Appendix Tables and Figures
================

-   <a href="#table-s1-demographic-statistics-by-sample"
    id="toc-table-s1-demographic-statistics-by-sample">Table S1: Demographic
    statistics by sample</a>
-   <a
    href="#figure-s1-total-participants-with-browser-activity-data-over-time"
    id="toc-figure-s1-total-participants-with-browser-activity-data-over-time">Figure
    S1: Total participants with browser activity data over time</a>
-   <a href="#figure-s2-coonsumption-levels-over-time-by-channel-type"
    id="toc-figure-s2-coonsumption-levels-over-time-by-channel-type">Figure
    S2: Coonsumption levels over time by channel type</a>
-   <a
    href="#figure-s3-and-s4-youtube-video-diets-of-individuals-who-viewed-any-alternativeextremist-channel-video"
    id="toc-figure-s3-and-s4-youtube-video-diets-of-individuals-who-viewed-any-alternativeextremist-channel-video">Figure
    S3 and S4: YouTube video diets of individuals who viewed any
    alternative/extremist channel video</a>
-   <a
    href="#figure-s5-youtube-video-diets-of-alternative-and-extremist-superconsumers"
    id="toc-figure-s5-youtube-video-diets-of-alternative-and-extremist-superconsumers">Figure
    S5. YouTube video diets of alternative and extremist superconsumers</a>
-   <a href="#figure-s6-recommendation-follows-by-video-channel-type"
    id="toc-figure-s6-recommendation-follows-by-video-channel-type">Figure
    S6. Recommendation follows by video channel type</a>
-   <a
    href="#figure-s7-pages-viewed-immediately-prior-to-youtube-videos-by-channel-type"
    id="toc-figure-s7-pages-viewed-immediately-prior-to-youtube-videos-by-channel-type">Figure
    S7: Pages viewed immediately prior to YouTube videos by channel type</a>
-   <a
    href="#figure-s10-and-table-s5-correlates-of-youtube-video-exposure-by-channel-type-with-party-controls"
    id="toc-figure-s10-and-table-s5-correlates-of-youtube-video-exposure-by-channel-type-with-party-controls">Figure
    S10 and Table S5: Correlates of YouTube video exposure by channel type
    (with party controls)</a>
    -   <a
        href="#tables-s6-and-s7-separating-hostile-sexism-and-racial-resentment-specifications"
        id="toc-tables-s6-and-s7-separating-hostile-sexism-and-racial-resentment-specifications">Tables
        S6 and S7: Separating Hostile sexism and Racial resentment
        specifications</a>
    -   <a
        href="#figure-s11-and-table-s8-correlates-of-exposure-to-youtube-videos-by-channel-type-alternate-racial-attitude-measure"
        id="toc-figure-s11-and-table-s8-correlates-of-exposure-to-youtube-videos-by-channel-type-alternate-racial-attitude-measure">Figure
        S11 and Table S8: Correlates of exposure to YouTube videos by channel
        type (alternate racial attitude measure)</a>
    -   <a href="#factor-loadings" id="toc-factor-loadings">Factor loadings</a>
    -   <a href="#correlation-between-racial-and-gender-resentment"
        id="toc-correlation-between-racial-and-gender-resentment">Correlation
        between racial and gender resentment</a>
    -   <a href="#model-with-combined-score"
        id="toc-model-with-combined-score">Model with combined score</a>
-   <a
    href="#figure-s8-and-table-s3-zero-inflated-models-on-correlates-of-time-on-youtube-video-by-channel-type"
    id="toc-figure-s8-and-table-s3-zero-inflated-models-on-correlates-of-time-on-youtube-video-by-channel-type">Figure
    S8 and Table S3: Zero-inflated models on correlates of time on YouTube
    video by channel type</a>
-   <a
    href="#figure-s9-and-table-s4-correlates-of-youtube-video-views-by-channel-type"
    id="toc-figure-s9-and-table-s4-correlates-of-youtube-video-views-by-channel-type">Figure
    S9 and Table S4: Correlates of YouTube video views by channel type</a>
-   <a href="#figure-s12-correlation-between-browser-history-and-activity"
    id="toc-figure-s12-correlation-between-browser-history-and-activity">Figure
    S12: Correlation between browser history and activity</a>
-   <a href="#table-s9-differential-browsing-behavior-after-install"
    id="toc-table-s9-differential-browsing-behavior-after-install">Table S9:
    Differential browsing behavior after install</a>
-   <a
    href="#figure-s13-differences-in-perceptions-of-youtube-between-full-sample-and-extension-sample"
    id="toc-figure-s13-differences-in-perceptions-of-youtube-between-full-sample-and-extension-sample">Figure
    S13: Differences in perceptions of YouTube between full sample and
    extension sample</a>
-   <a
    href="#figure-s14-percentage-of-views-to-each-channel-type-by-video-number-within-session"
    id="toc-figure-s14-percentage-of-views-to-each-channel-type-by-video-number-within-session">Figure
    S14: Percentage of views to each channel type by video number within
    session</a>
-   <a
    href="#figure-s15-percentage-of-views-to-each-channel-type-by-total-session-length"
    id="toc-figure-s15-percentage-of-views-to-each-channel-type-by-total-session-length">Figure
    S15: Percentage of views to each channel type by total session
    length</a>
-   <a
    href="#table-s10-external-referrers-to-alternative-and-extremist-channel-videos"
    id="toc-table-s10-external-referrers-to-alternative-and-extremist-channel-videos">Table
    S10: External referrers to alternative and extremist channel videos</a>
-   <a
    href="#figure-s16-concentration-of-exposure-to-alternative-and-extremist-channels-view-counts"
    id="toc-figure-s16-concentration-of-exposure-to-alternative-and-extremist-channels-view-counts">Figure
    S16: Concentration of exposure to alternative and extremist channels
    (view counts)</a>

``` r
### ----------- data
# survey + extension data for all participants
merged_data <- read_rds("data/yg_browser_cces_merged.rds")
# activity data
activity_data <- read_rds("data/activity_yg_cces.rds") 
# browser history data
browser_history_data <-read_rds("data/browser_history_yg_cces.rds")

# over time activity data, averages by days and channel type
day_count_averages <- read_csv( "data/day_count_averages.csv")
day_time_averages <- read_csv( "data/day_time_averages.csv")

# referrers data (see build.R for construction of this table)
on_platform_referrers_by_channel <- 
  read_csv('data/on_platform_referrers_by_channel_wtd.csv')

# recommendations data
recs_data <- 
  read_delim("data/recommendation_pipeline_wtd.tsv", delim = '\t')

# color palette 
color_palette <- c("#FFA500", "#CD5C5C", "#015CB9", "#E3E6E6")
```

``` r
source("helper_fxns.R") # these are mostly for plotting and presentation
```

## Table S1: Demographic statistics by sample

``` r
# select variables of interest
demo_vars <- c(
  'gender',
  'race',
  'presvote16post',
  'employ',
  'educ2',
  'religion',
  'marital',
  'faminc_new',
  'pid3',
  'age_group'
)
demographics_data <- merged_data %>%
  dplyr::select(weight_cmd, all_of(demo_vars), browser_sample) %>%
  fastDummies::dummy_cols(select_columns = all_of(demo_vars))
# level of each variable
var_levels <- demographics_data %>% 
  dplyr::select(-demo_vars, -weight_cmd, -browser_sample) %>% 
  names()

# function to calculate (un)weighted stats depending on sample
weighted_stats_fxn <- function (var, sample) {
  require(TAM)
  require(diagis)
  if (sample == "full") {
    d <- # take non-null in variable
      demographics_data[complete.cases(demographics_data[[var]], demographics_data$weight_cmd), , drop =
                          FALSE][[var]]
    w <-
      demographics_data$weight_cmd[complete.cases(demographics_data[[var]],  demographics_data$weight_cmd)]
  } else if (sample == "browser") {
    d <- # non-Nulls in extension sample
      demographics_data[complete.cases(demographics_data[[var]], demographics_data$weight_cmd) &
                          demographics_data$browser_sample == "browser", , drop = FALSE][[var]]
    w <-
      demographics_data$weight_cmd[complete.cases(demographics_data[[var]],  demographics_data$weight_cmd) &
                                     demographics_data$browser_sample == "browser"]
  }
  out <- data.frame(
    "variable" = str_split(string = var, pattern = "_(?=[^_]+$)")[[1]][1] ,
    "level" = str_split(string = var, pattern = "_(?=[^_]+$)")[[1]][2] ,
    "n" = length(d),
    "unweighted_mean" = mean(d, na.rm = T),
    "weighted_mean" = TAM::weighted_mean(d, w = w),
    "unweighted_se" = sd(d, na.rm = T) / sqrt(length(d)),
    "weighted_se" = diagis::weighted_se(d, w = w, na.rm = TRUE),
    "sample" = sample
  )
  return(out)
}

# function to format table
custom_kable_weighted <- function(data, 
                                  col_names = c("", "Weighted", "Unweighted")) {
  kbl(
    data %>% dplyr::select(-variable),
    booktabs = TRUE,
    caption = "Full and extension sample demographics",
    format = "html",
    align = "c",
    digits = 2,
    linesep = "",
    col.names = col_names
  )  %>%
    kable_styling(latex_options =  "HOLD_position",
                  font_size = 10,
                  #bootstrap_options = c("striped","condensed"),
                  stripe_index = c(1, 5, 13, 17, 21, 29, 35, 41, 51, 57)) %>%
    group_rows("Gender", 1, 4,
               background = "gray!6",
               latex_gap_space = "0em") %>%
    group_rows("Race", 5, 8,
               background = "gray!6",
               latex_gap_space = "0em") %>%
    group_rows("2016 presidential vote", 9, 12,
               background = "gray!6",
               latex_gap_space = "0em") %>%
    group_rows("Employment status", 13, 16,
               background = "gray!6",
               latex_gap_space = "0em") %>%
    group_rows("Education", 16, 24,
               background = "gray!6",
               latex_gap_space = "0em") %>%
    group_rows("Religion", 25, 30,
               background = "gray!6",
               latex_gap_space = "0em") %>%
    group_rows("Marital status", 31, 36,
               background = "gray!6",
               latex_gap_space = "0em") %>%
    group_rows("Family income", 37, 46,
               background = "gray!6",
               latex_gap_space = "0em") %>%
    group_rows("Party identification", 47, 52,
               background = "gray!6",
               latex_gap_space = "0em") %>%
    group_rows("Age", 53, 60,
               background = "gray!6",
               latex_gap_space = "0em") %>% 
    footnote(
      c("Weighted estimates use YouGov survey weights.",
        "Standard errors are in parentheses."))%>%
    row_spec(61, extra_latex_after = "\\cline{3-4}")%>%
    row_spec(62, hline_after = TRUE)
}
```

``` r
# get (un)weighted estimates for extension sample
browser_sample <-
  map(var_levels, ~weighted_stats_fxn(.x, sample = "browser"))
# get (un)weighted estimates for full survey sample
full_sample <-
  map(var_levels, ~weighted_stats_fxn(.x, sample = "full"))

# merge full and extension estimates together
demographic_stats_table <- browser_sample %>%
  bind_rows() %>%
  left_join(
    bind_rows(full_sample),
    by = c("variable", "level"),
    suffix = c(".browser", ".full")
  ) %>%
  mutate_at(
    vars(
      starts_with("unweighted_"),
      starts_with("weighted_")
    ),
    list( ~ format(round(., 2), nsmall = 2) )
  )

# number of respondents in survey sample
n_full <- 
  demographics_data %>% 
  nrow()
# number of respondents in extension sample
n_browser <- 
  demographics_data %>% 
  filter(browser_sample == "browser") %>% 
  nrow()
```

``` r
# reformat 
full_summ <- demographic_stats_table %>%
  dplyr::select(
    variable,
    level,
    weighted_mean.full,
    weighted_se.full,
    unweighted_mean.full,
    unweighted_se.full
  ) %>%
  mutate(
    unweighted_se.full = paste0("(", as.character(unweighted_se.full), ")"),
    weighted_se.full = paste0("(", as.character(weighted_se.full), ")"),
    weighted_mean.full = as.character(weighted_mean.full),
    unweighted_mean.full  = as.character(unweighted_mean.full)
  ) %>%
  pivot_longer(c(
    weighted_mean.full,
    weighted_se.full,
    unweighted_mean.full,
    unweighted_se.full
  )) %>%
  mutate(weighted = ifelse(str_detect(name, "unweighted"), "unweighted", "weighted")) %>%
  pivot_wider(-c(value), weighted) %>%
  mutate(
    weighted = ifelse(is.na(weighted), lag(weighted, n = 2L), weighted),
    unweighted = ifelse(is.na(unweighted), lead(unweighted, n = 2L), unweighted)
  ) %>%
  filter(str_detect(name, "unweighted")) %>% 
  filter(level != "NA" & level != "Other") %>% 
  select(-name) %>%
  mutate(level = ifelse(1:length(.$level) %% 2 == 0, "", level)) %>%
  add_row(
    variable = "",
    level = "",
    weighted = paste0("n = ", n_full),
    unweighted = paste0("n = ", n_full)
  ) 

# extension sample
browser_summ <- demographic_stats_table %>%
  dplyr::select(
    variable,
    level,
    weighted_mean.browser,
    weighted_se.browser,
    unweighted_mean.browser,
    unweighted_se.browser
  ) %>%
  mutate(
    unweighted_se.browser = paste0("(", as.character(unweighted_se.browser), ")"),
    weighted_se.browser = paste0("(", as.character(weighted_se.browser), ")"),
    weighted_mean.browser = as.character(weighted_mean.browser),
    unweighted_mean.browser  = as.character(unweighted_mean.browser)
  ) %>%
  pivot_longer(
    c(
      weighted_mean.browser,
      weighted_se.browser,
      unweighted_mean.browser,
      unweighted_se.browser
    )
  ) %>%
  mutate(weighted = ifelse(str_detect(name, "unweighted"), "unweighted", "weighted")) %>%
  pivot_wider(-c(value), weighted) %>%
  mutate(
    weighted = ifelse(is.na(weighted), lag(weighted, n = 2L), weighted),
    unweighted = ifelse(is.na(unweighted), lead(unweighted, n = 2L), unweighted)
  ) %>%
  filter(str_detect(name, "unweighted")) %>%
  filter(level != "NA" & level != "Other") %>% 
  select(-name) %>%
  mutate(level = ifelse(1:length(.$level) %% 2 == 0, "", level)) %>%
  add_row(
    variable = "",
    level = "",
    weighted = paste0("n = ", n_browser),
    unweighted = paste0("n = ", n_browser)
  )

# put full and extension sample tables together...
combined_table <- full_summ %>%
  rename(full_weighted = weighted,
         full_unweighted = unweighted) %>%
  bind_cols(
    browser_summ %>%
      rename(
        extension_weighted = weighted,
        extension_unweighted = unweighted
      ) %>% 
      select(-variable, -level)
  )

# print table
custom_kable_weighted(combined_table,
                      col_names = c("", "Full weighted", "Full unweighted", 
                                    "Extension weighted", "Extension unweighted"))
```

<table class="table" style="font-size: 10px; margin-left: auto; margin-right: auto;">
<caption style="font-size: initial !important;">
Full and extension sample demographics
</caption>
<thead>
<tr>
<th style="text-align:center;">
</th>
<th style="text-align:center;">
Full weighted
</th>
<th style="text-align:center;">
Full unweighted
</th>
<th style="text-align:center;">
Extension weighted
</th>
<th style="text-align:center;">
Extension unweighted
</th>
</tr>
</thead>
<tbody>
<tr grouplength="4">
<td colspan="5" style="border-bottom: 1px solid;background-color: gray!6 !important;">
<strong>Gender</strong>
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
Female
</td>
<td style="text-align:center;">
0.48
</td>
<td style="text-align:center;">
0.46
</td>
<td style="text-align:center;">
0.49
</td>
<td style="text-align:center;">
0.49
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.02)
</td>
<td style="text-align:center;">
(0.01)
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
Male
</td>
<td style="text-align:center;">
0.52
</td>
<td style="text-align:center;">
0.54
</td>
<td style="text-align:center;">
0.51
</td>
<td style="text-align:center;">
0.51
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.02)
</td>
<td style="text-align:center;">
(0.01)
</td>
</tr>
<tr grouplength="4">
<td colspan="5" style="border-bottom: 1px solid;background-color: gray!6 !important;">
<strong>Race</strong>
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
White
</td>
<td style="text-align:center;">
0.68
</td>
<td style="text-align:center;">
0.76
</td>
<td style="text-align:center;">
0.69
</td>
<td style="text-align:center;">
0.75
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.02)
</td>
<td style="text-align:center;">
(0.01)
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
Non-white
</td>
<td style="text-align:center;">
0.32
</td>
<td style="text-align:center;">
0.24
</td>
<td style="text-align:center;">
0.31
</td>
<td style="text-align:center;">
0.25
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.02)
</td>
<td style="text-align:center;">
(0.01)
</td>
</tr>
<tr grouplength="4">
<td colspan="5" style="border-bottom: 1px solid;background-color: gray!6 !important;">
<strong>2016 presidential vote</strong>
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
Donald Trump
</td>
<td style="text-align:center;">
0.33
</td>
<td style="text-align:center;">
0.40
</td>
<td style="text-align:center;">
0.19
</td>
<td style="text-align:center;">
0.20
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.02)
</td>
<td style="text-align:center;">
(0.01)
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
Hillary Clinton
</td>
<td style="text-align:center;">
0.28
</td>
<td style="text-align:center;">
0.31
</td>
<td style="text-align:center;">
0.40
</td>
<td style="text-align:center;">
0.49
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.02)
</td>
<td style="text-align:center;">
(0.01)
</td>
</tr>
<tr grouplength="4">
<td colspan="5" style="border-bottom: 1px solid;background-color: gray!6 !important;">
<strong>Employment status</strong>
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
Employed
</td>
<td style="text-align:center;">
0.46
</td>
<td style="text-align:center;">
0.49
</td>
<td style="text-align:center;">
0.48
</td>
<td style="text-align:center;">
0.51
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.02)
</td>
<td style="text-align:center;">
(0.01)
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
Unemployed
</td>
<td style="text-align:center;">
0.12
</td>
<td style="text-align:center;">
0.10
</td>
<td style="text-align:center;">
0.12
</td>
<td style="text-align:center;">
0.10
</td>
</tr>
<tr grouplength="9">
<td colspan="5" style="border-bottom: 1px solid;background-color: gray!6 !important;">
<strong>Education</strong>
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="2">
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.00)
</td>
<td style="text-align:center;">
(0.02)
</td>
<td style="text-align:center;">
(0.01)
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
High school graduate
</td>
<td style="text-align:center;">
0.35
</td>
<td style="text-align:center;">
0.19
</td>
<td style="text-align:center;">
0.26
</td>
<td style="text-align:center;">
0.14
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.02)
</td>
<td style="text-align:center;">
(0.01)
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
Some college
</td>
<td style="text-align:center;">
0.35
</td>
<td style="text-align:center;">
0.37
</td>
<td style="text-align:center;">
0.37
</td>
<td style="text-align:center;">
0.35
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.02)
</td>
<td style="text-align:center;">
(0.01)
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
4-year
</td>
<td style="text-align:center;">
0.19
</td>
<td style="text-align:center;">
0.26
</td>
<td style="text-align:center;">
0.24
</td>
<td style="text-align:center;">
0.28
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.02)
</td>
<td style="text-align:center;">
(0.01)
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
Post-grad
</td>
<td style="text-align:center;">
0.11
</td>
<td style="text-align:center;">
0.18
</td>
<td style="text-align:center;">
0.13
</td>
<td style="text-align:center;">
0.23
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.01)
</td>
</tr>
<tr grouplength="6">
<td colspan="5" style="border-bottom: 1px solid;background-color: gray!6 !important;">
<strong>Religion</strong>
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
Atheist/Agnostic
</td>
<td style="text-align:center;">
0.37
</td>
<td style="text-align:center;">
0.35
</td>
<td style="text-align:center;">
0.47
</td>
<td style="text-align:center;">
0.46
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.02)
</td>
<td style="text-align:center;">
(0.01)
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
Protestant
</td>
<td style="text-align:center;">
0.32
</td>
<td style="text-align:center;">
0.34
</td>
<td style="text-align:center;">
0.26
</td>
<td style="text-align:center;">
0.27
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.02)
</td>
<td style="text-align:center;">
(0.01)
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
Roman Catholic
</td>
<td style="text-align:center;">
0.18
</td>
<td style="text-align:center;">
0.18
</td>
<td style="text-align:center;">
0.15
</td>
<td style="text-align:center;">
0.14
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.02)
</td>
<td style="text-align:center;">
(0.01)
</td>
</tr>
<tr grouplength="6">
<td colspan="5" style="border-bottom: 1px solid;background-color: gray!6 !important;">
<strong>Marital status</strong>
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
Divorced
</td>
<td style="text-align:center;">
0.11
</td>
<td style="text-align:center;">
0.12
</td>
<td style="text-align:center;">
0.10
</td>
<td style="text-align:center;">
0.12
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.01)
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
Married
</td>
<td style="text-align:center;">
0.43
</td>
<td style="text-align:center;">
0.53
</td>
<td style="text-align:center;">
0.39
</td>
<td style="text-align:center;">
0.48
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.02)
</td>
<td style="text-align:center;">
(0.01)
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
Never married
</td>
<td style="text-align:center;">
0.35
</td>
<td style="text-align:center;">
0.26
</td>
<td style="text-align:center;">
0.39
</td>
<td style="text-align:center;">
0.30
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.02)
</td>
<td style="text-align:center;">
(0.01)
</td>
</tr>
<tr grouplength="10">
<td colspan="5" style="border-bottom: 1px solid;background-color: gray!6 !important;">
<strong>Family income</strong>
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
\< $10,000
</td>
<td style="text-align:center;">
0.07
</td>
<td style="text-align:center;">
0.05
</td>
<td style="text-align:center;">
0.08
</td>
<td style="text-align:center;">
0.05
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.00)
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.01)
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
$10,000 - $29,999
</td>
<td style="text-align:center;">
0.21
</td>
<td style="text-align:center;">
0.16
</td>
<td style="text-align:center;">
0.22
</td>
<td style="text-align:center;">
0.18
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.02)
</td>
<td style="text-align:center;">
(0.01)
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
$30,000 - $69,999
</td>
<td style="text-align:center;">
0.36
</td>
<td style="text-align:center;">
0.36
</td>
<td style="text-align:center;">
0.36
</td>
<td style="text-align:center;">
0.35
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.02)
</td>
<td style="text-align:center;">
(0.01)
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
$70,000 - $119,999
</td>
<td style="text-align:center;">
0.24
</td>
<td style="text-align:center;">
0.27
</td>
<td style="text-align:center;">
0.23
</td>
<td style="text-align:center;">
0.26
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.02)
</td>
<td style="text-align:center;">
(0.01)
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
\> $120,000
</td>
<td style="text-align:center;">
0.11
</td>
<td style="text-align:center;">
0.17
</td>
<td style="text-align:center;">
0.12
</td>
<td style="text-align:center;">
0.16
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.01)
</td>
</tr>
<tr grouplength="6">
<td colspan="5" style="border-bottom: 1px solid;background-color: gray!6 !important;">
<strong>Party identification</strong>
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
Democrat
</td>
<td style="text-align:center;">
0.37
</td>
<td style="text-align:center;">
0.35
</td>
<td style="text-align:center;">
0.51
</td>
<td style="text-align:center;">
0.54
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.02)
</td>
<td style="text-align:center;">
(0.01)
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
Independent
</td>
<td style="text-align:center;">
0.32
</td>
<td style="text-align:center;">
0.32
</td>
<td style="text-align:center;">
0.29
</td>
<td style="text-align:center;">
0.28
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.02)
</td>
<td style="text-align:center;">
(0.01)
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
Republican
</td>
<td style="text-align:center;">
0.31
</td>
<td style="text-align:center;">
0.33
</td>
<td style="text-align:center;">
0.20
</td>
<td style="text-align:center;">
0.18
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.02)
</td>
<td style="text-align:center;">
(0.01)
</td>
</tr>
<tr grouplength="8">
<td colspan="5" style="border-bottom: 1px solid;background-color: gray!6 !important;">
<strong>Age</strong>
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
18-34
</td>
<td style="text-align:center;">
0.27
</td>
<td style="text-align:center;">
0.16
</td>
<td style="text-align:center;">
0.33
</td>
<td style="text-align:center;">
0.21
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.02)
</td>
<td style="text-align:center;">
(0.01)
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
35-54
</td>
<td style="text-align:center;">
0.33
</td>
<td style="text-align:center;">
0.34
</td>
<td style="text-align:center;">
0.31
</td>
<td style="text-align:center;">
0.37
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.02)
</td>
<td style="text-align:center;">
(0.01)
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
55-64
</td>
<td style="text-align:center;">
0.18
</td>
<td style="text-align:center;">
0.23
</td>
<td style="text-align:center;">
0.18
</td>
<td style="text-align:center;">
0.24
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.01)
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
65+
</td>
<td style="text-align:center;">
0.21
</td>
<td style="text-align:center;">
0.27
</td>
<td style="text-align:center;">
0.18
</td>
<td style="text-align:center;">
0.19
</td>
</tr>
<tr>
<td style="text-align:center; padding-left:  2em;" indentlevel="1">
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.01)
</td>
<td style="text-align:center;">
(0.02)
</td>
<td style="text-align:center;">
(0.01)
</td>
</tr>
<tr>
<td style="text-align:center;">
</td>
<td style="text-align:center;">
n = 4000
</td>
<td style="text-align:center;">
n = 4000
</td>
<td style="text-align:center;">
n = 1236
</td>
<td style="text-align:center;">
n = 1236
</td>
</tr>
</tbody>
<tfoot>
<tr>
<td style="padding: 0; border: 0;" colspan="100%">
<span style="font-style: italic;">Note: </span>
</td>
</tr>
<tr>
<td style="padding: 0; border: 0;" colspan="100%">
<sup></sup> Weighted estimates use YouGov survey weights.
</td>
</tr>
<tr>
<td style="padding: 0; border: 0;" colspan="100%">
<sup></sup> Standard errors are in parentheses.
</td>
</tr>
</tfoot>
</table>

## Figure S1: Total participants with browser activity data over time

``` r
# study period
date_range <- as.Date(c("2020-07-21", "2020-12-31"))
# all dates in this period
date_vector <-
  as.Date(seq(date_range[1], date_range[2], by = "day"))

# count number of unique individuals per day
users_by_mat <-
  matrix(
    data = 0,
    nrow = nrow(activity_data),
    ncol = length(date_vector)
  )
for (i in 1:nrow(activity_data)) {
  start_id <-
    which(date_vector == as.Date(activity_data[i, ]$activity_start_date))
  end_id <-
    which(date_vector == as.Date(activity_data[i, ]$activity_end_date))
  # interpolate between start and end date
  users_by_mat[i, start_id:end_id] <- activity_data[i, ]$weight_cmd
}

# tally total number for each day
n_users_by_date <- tibble(date = date_vector,
                          n_activity_users = colSums(users_by_mat))

# plot
n_users_by_date %>%
  ggplot(aes(x = date, y = n_activity_users)) +
  geom_col(alpha = .8,
           position = position_dodge2(padding = .1)) +
  labs(x = "", y = "Number of participants") +
  theme_minimal()
```

<img src="appendix-figures_files/figure-gfm/attrition-1.png" style="display: block; margin: auto;" />

``` r
ggsave('./appendix-figures_files/attrition_wtd.png',
      dpi = 600, width = 10, height = 6)
ggsave('./appendix-figures_files/attrition_wtd.pdf',
      dpi = 600, width = 10, height = 6)
```

Distribution of time in study

``` r
# average number of days in study
ggplot(tibble(days_enrolled = rowSums(users_by_mat))) +
  geom_histogram(aes(x = days_enrolled),
                 alpha = .8,
                 bins = length(date_vector)/2) +
  geom_vline(xintercept = median(rowSums(users_by_mat)),
             color = "blue") +
  labs(x = "Days enrolled in study",
       y = "Number of participants") +
  annotate(geom = "text", 
           label = paste0("median = ", median(rowSums(users_by_mat)), " days"),
           angle = 90,
           color = "blue",
           x =  median(rowSums(users_by_mat)) - 2,
           y = 150) +
  theme_minimal()
```

<img src="appendix-figures_files/figure-gfm/unnamed-chunk-2-1.png" style="display: block; margin: auto;" />

## Figure S2: Coonsumption levels over time by channel type

``` r
# calculate centered moving averages for each channel type
# views
ma_count <- day_count_averages %>%
  group_by(source) %>%
  mutate(
    ma7_time = rollmean(avg_count_day, k = 7, fill = NA),
    ma14_time = rollmean(avg_count_day, k = 14, fill = NA),
    ma21_time = rollmean(avg_count_day, k = 21, fill = NA),
    ln_time = log(avg_count_day),
    ln_ma14_time = rollapplyr(
      ln_time,
      14,
      mean,
      na.rm = TRUE,
      by = 1,
      partial = TRUE,
      fill = NA
    )
  )
# time elapsed
ma_time <- day_time_averages %>%
  group_by(source) %>%
  mutate(
    ma7_time = rollmean(avg_time_day, k = 7, fill = NA),
    ma14_time = rollmean(avg_time_day, k = 14, fill = NA),
    ma21_time = rollmean(avg_time_day, k = 21, fill = NA),
    ln_time = log(avg_time_day),
    ln_ma14_time = rollapplyr(
      ln_time,
      14,
      mean,
      na.rm = TRUE,
      by = 1,
      partial = TRUE,
      fill = NA
    )
  )

# plots
sma_count_plot <- ma_count %>%
  mutate(source = recode_channel_type_fxn(source)) %>%
  ggplot(aes(x = full_date, y = avg_count_day,
             color = source)) +
  geom_point(alpha = .6) +
  geom_line(
    data =  . %>% filter((month(full_date) >= 8 &
                            lubridate::day(full_date) >= 15) |  month(full_date) >= 9),
    aes(y = ma21_time,
        color = source),
    size = 1
  ) +
  scale_color_manual(
    values = c(
      "Alternative \nchannel" = "#FFA500",
      "Extremist \nchannel" = "#CD5C5C",
      "Mainstream media \nchannel" = "#015CB9",
      "Other \nchannel" = "#E3E6E6"
    ),
    name = ""
  ) +
  labs(x = "",
       y = "Average number of visits to\nchannel videos")  +
  theme_minimal() +
  theme(legend.position = "bottom") +
  guides(linetype = guide_legend(""))

sma_time_plot <- ma_time %>%
  mutate(source = recode_channel_type_fxn(source)) %>%
  ggplot(aes(x = full_date, y = avg_time_day,
             color = source)) +
  geom_point(alpha = .6) +
  geom_line(
    data = . %>% filter((
      month(full_date) >= 8 &
        lubridate::day(full_date) >= 15
    ) |  month(full_date) >= 9),
    aes(y = ma21_time,
        color = source),
    size = 1
  ) +
  scale_color_manual(
    values = c(
      "Alternative \nchannel" = "#FFA500",
      "Extremist \nchannel" = "#CD5C5C",
      "Mainstream media \nchannel" = "#015CB9",
      "Other \nchannel" = "#E3E6E6"
    ),
    name = ""
  ) +
  labs(x = "",
       y = "Average minutes spent on\nchannel videos")  +
  theme_minimal() +
  theme(legend.position = "bottom") +
  guides(linetype = guide_legend(""))


sma_count_plot + sma_time_plot + plot_layout(ncol=1,heights=c(2,1))
```

<img src="appendix-figures_files/figure-gfm/unnamed-chunk-3-1.png" style="display: block; margin: auto;" />

``` r
ggsave('./appendix-figures_files/trend_by_channel_type_sma.png',
      dpi = 600, width = 10, height = 14)
ggsave('./appendix-figures_files/trend_by_channel_type_sma.pdf',
      dpi = 600, width = 10, height = 14)
```

## Figure S3 and S4: YouTube video diets of individuals who viewed any alternative/extremist channel video

Distribution of time spent per week on channels for anyone who watched
an alternative (extremist) channel video over the course of the study.
Bars are in descending order of most to least time on alternative
(extremist) channel videos.

``` r
#cumulative time spent on that channel and the cumulative user 
cumsum_fxn <- function (var, data) {
  channel_type <-
      str_replace(str_extract(var, "[a-z]+_all_week$"), "_all_week", "")
  data %>%
    arrange(desc(.data[[var]])) %>%
    mutate(
      users = weight_cmd / sum(.$weight_cmd, na.rm = T),
      views = (.data[[var]] / sum(.data[[var]], na.rm = T)),
      cum_user = cumsum(users),
      cum_views = cumsum(views),
      ln_cum_user = log10(cumsum(users)),
      ln_cum_views = log10(cumsum(views)),
      source = channel_type
    ) %>%
    select(cum_user, cum_views, ln_cum_user, source, caseid, weight_cmd)
}

# dependent variables (time spent per week) by channel type
minutes_activity_time_all_week <- c(
    "minutes_activity_yt_video_time_elapsed_capped_total_alternative_all_week",
    "minutes_activity_yt_video_time_elapsed_capped_total_extremist_all_week",
    "minutes_activity_yt_video_time_elapsed_capped_total_mainstream_all_week",
    "minutes_activity_yt_video_time_elapsed_capped_total_other_all_week"
  )

# calculate for all channel types
concentration_time_user <- map_dfr(minutes_activity_time_all_week, 
                            ~cumsum_fxn(.x, activity_data))

# join with rest of survey data
activity_data_supers <- activity_data %>%
  left_join(
    concentration_time_user %>%
      select(caseid, cum_views,  source) %>%
      pivot_wider(
        names_from = "source",
        names_glue = "cumsum_{source}",
        values_from = "cum_views"
      ),
    by = "caseid"
  ) %>%
  # define a superconsumer as someone who is in the top 80th percentile of watch time per week
  mutate(
    super_alternative = if_else(cumsum_alternative <= .8, 1, 0),
    super_extremist = if_else(cumsum_extremist <= .8, 1, 0),
    super_mainstream = if_else(cumsum_mainstream <= .8, 1, 0)
  )


# anyone who watched at least one alternative
top_time_all_weeks_most_any_alt <- activity_data_supers  %>% 
  filter(at_alt == 1)  %>% 
  arrange(desc(minutes_activity_yt_video_time_elapsed_capped_total_alternative_all_week)) %>% 
  mutate(rank = 1:nrow(.)) %>% 
  pivot_longer(cols = all_of(minutes_activity_time_all_week)) %>% 
  mutate(channel_type = str_extract(str_replace(name, "_all_week$", ""), "[a-z]+$"),
         minutes_value = value,
         image = "https://i.postimg.cc/bwL3hjPY/user-icon-extremist.png" ) 

# anyone who watched at least one extremist
top_time_all_weeks_most_any_ext <- activity_data_supers  %>% 
  filter(at_ext == 1)  %>% 
  arrange(desc(minutes_activity_yt_video_time_elapsed_capped_total_extremist_all_week)) %>% 
  mutate(rank = 1:nrow(.)) %>% 
  pivot_longer(cols = all_of(minutes_activity_time_all_week)) %>% 
  mutate(channel_type = str_extract(str_replace(name, "_all_week$", ""), "[a-z]+$"),
         minutes_value = value,
         image = "https://i.postimg.cc/D0BPKrVj/user-icon-alternative.png" )

topuser_plot(
    data = top_time_all_weeks_most_any_alt,
    figure_size = .02,
    figure_space = -100,
    channel_type = "Alternative",
    title = "Particpants who watched any alternative channel video",
    ylabel = "Minutes per week on YouTube videos",
    y_limit = 3e3
  )
```

<img src="appendix-figures_files/figure-gfm/unnamed-chunk-4-1.png" style="display: block; margin: auto;" />

``` r
ggsave('./appendix-figures_files/any_alternative_video_diet_time_elapsed_week.png',
      dpi = 600, width = 10, height = 6)
ggsave('./appendix-figures_files/any_alternative_video_diet_time_elapsed_week.pdf',
      dpi = 600, width = 10, height = 6)

topuser_plot(
    data = top_time_all_weeks_most_any_ext,
    figure_size = .02,
    figure_space = -160,
    channel_type = "Extremist",
    title = "Particpants who watched any extremist channel video",
    ylabel = "Minutes per week on YouTube videos",
    y_limit = 3e3
  )
```

<img src="appendix-figures_files/figure-gfm/unnamed-chunk-4-2.png" style="display: block; margin: auto;" />

``` r
ggsave('./appendix-figures_files/any_extremist_video_diet_time_elapsed_week.png',
      dpi = 600, width = 10, height = 6)
ggsave('./appendix-figures_files/any_extremist_video_diet_time_elapsed_week.pdf',
      dpi = 600, width = 10, height = 6)
```

## Figure S5. YouTube video diets of alternative and extremist superconsumers

Distribution of time spent per week on channels for alternative
(extremist) superconsumers. Superconsumers are individuals who make up
the top 80th percentile of watch time for the given channel type. Bars
are in descending order of most to least time on alternative (extremist)
channel videos.

``` r
# join with rest of survey data
activity_data_supers <- activity_data %>%
  left_join(
    concentration_time_user %>%
      select(caseid, cum_views,  source) %>%
      pivot_wider(
        names_from = "source",
        names_glue = "cumsum_{source}",
        values_from = "cum_views"
      ),
    by = "caseid"
  ) %>%
  # define a superconsumer as someone who is in the top 80th percentile of watch time per week
  mutate(
    super_alternative = if_else(cumsum_alternative <= .8, 1, 0),
    super_extremist = if_else(cumsum_extremist <= .8, 1, 0),
    super_mainstream = if_else(cumsum_mainstream <= .8, 1, 0)
  )


# select the top alternative super consumers
top_time_all_weeks_most_alt <- activity_data_supers  %>%
  filter(super_alternative == 1)  %>%
  arrange(desc(
    minutes_activity_yt_video_time_elapsed_capped_total_alternative_all_week
  )) %>%
  mutate(rank = 1:nrow(.)) %>%
  pivot_longer(cols = all_of(minutes_activity_time_all_week)) %>%
  mutate(
    channel_type = str_replace(name, "_all_week$", ""),
    channel_type = str_extract(channel_type, "[a-z]+$"),
    minutes_value = value,
    image = if_else(
      super_extremist == 1 ,
      "https://i.postimg.cc/bwL3hjPY/user-icon-extremist.png",
      NA_character_
    )
  )

# select the top extremist super consumers
top_time_all_weeks_most_ext <- activity_data_supers  %>%
  filter(super_extremist == 1)  %>%
  arrange(desc(
    minutes_activity_yt_video_time_elapsed_capped_total_extremist_all_week
  )) %>%
  mutate(rank = 1:nrow(.)) %>%
  pivot_longer(cols = all_of(minutes_activity_time_all_week)) %>%
  mutate(
    channel_type = str_replace(name, "_all_week$", ""),
    channel_type = str_extract(channel_type, "[a-z]+$"),
    minutes_value = value,
    image = if_else(
      super_alternative == 1 ,
      "https://i.postimg.cc/D0BPKrVj/user-icon-alternative.png",
      NA_character_
    )
  )

alternative_superconsumers <- activity_data_supers %>% 
  filter(super_alternative == 1) 
extemist_superconsumers <- activity_data_supers %>% 
  filter(super_extremist == 1) 

# alternative superconsumers
alternative_superconsumers_weeks_plot <-
  topuser_plot(
    data = top_time_all_weeks_most_alt,
    figure_space = -160,
    figure_size = .075,
    channel_type = "Alternative",
    title = "Alternative channel superconsumers",
    ylabel = "Minutes per week on YouTube videos",
    y_limit = 3e3
  )
# extremist superconsumers
extremist_superconsumers_weeks_plot <-
  topuser_plot(
    data = top_time_all_weeks_most_ext,
    figure_space = -160,
    figure_size = .075,
    channel_type = "Extremist",
    title = "Extremist channel superconsumers",
    ylabel = "Minutes per week on YouTube videos",
    y_limit = 3e3
  )

# same legend 
common_legend_superconsumers <-
  get_legend(alternative_superconsumers_weeks_plot)

# put panels side by side
plot_grid(
  plot_grid(
    alternative_superconsumers_weeks_plot +
      theme(legend.position = 'none'),
    extremist_superconsumers_weeks_plot +
      theme(legend.position = 'none'),
    labels = c("A", "B"),
    label_size = 16,
    nrow = 1
  ),
  common_legend_superconsumers,
  ncol = 1,
  rel_heights = c(1, .1)
)
```

<img src="appendix-figures_files/figure-gfm/calculate cumulative sums-1.png" style="display: block; margin: auto;" />

``` r
ggsave('./appendix-figures_files/superconsumers_time_elapsed_week.png',
      dpi = 600, width = 12, height = 6)
ggsave('./appendix-figures_files/superconsumers_time_elapsed_week.pdf',
      dpi = 600, width = 12, height = 6)
```

## Figure S6. Recommendation follows by video channel type

``` r
### ===== recommendations followed setup
alternative_followed_table <-
  recs_prop_table_fxn(channel_type = "alternative",
                      statistic = "followed")
extremist_followed_table <-
  recs_prop_table_fxn(channel_type = "extremist",
                      statistic = "followed")
mainstream_followed_table <-
  recs_prop_table_fxn(channel_type = "mainstream",
                      statistic = "followed")
mainstream_followed_table_rounded <- mainstream_followed_table %>%
  mutate(
    prop = case_when(
      variable == "rec_match_alternative_match" ~ .01,
      variable == "rec_match_other_match" ~ 0.295,
      variable == "rec_match_mainstream_match" ~ 0.70,
      TRUE ~ prop
    )
  )
other_followed_table <- 
  recs_prop_table_fxn(channel_type = "other",
                      statistic = "followed")
#outputs to waffle_plot_fxn
alternative_recs_followed <-
  waffle_plot_fxn(alternative_followed_table, statistic = "followed",
                  title = "Alternative channel\nvideos")
extremist_recs_followed <-
  waffle_plot_fxn(extremist_followed_table, statistic = "followed",
                  title = "Extremist channel\nvideos")
mainstream_recs_followed <-
  waffle_plot_fxn(mainstream_followed_table_rounded, statistic = "followed",
                  make_proportional = F,
                  title = "Mainstream media\nchannel videoes")
other_recs_followed <-
  waffle_plot_fxn(other_followed_table, statistic = "followed",
                  title = "Other channel\nvideos")


# bar on top of waffles to show scale
total_recs_followed_table <- recs_data %>%
  filter(variable == "total recs followed") %>%
  pivot_longer(-variable) %>%
  select(-variable) %>%
  mutate(
    overall_percent = 100 * (value / sum(value)),
    cumulative_percent = cumsum(overall_percent)
  )

bar_recs_followed_plot <- scale_plot_fxn(
  data = total_recs_followed_table,
  multiplicative_factor = 8,
  text_positions = c(-3, 4, 12, 100),
  segment_y_lower = 4,
  segment_y_upper = 10,
  statistic = "followed"
)

# create dummy plot for legend
dummy_plot_followed <- alternative_followed_table %>%
  ggplot(aes(fill = factor(followed),
             values = prop * 100))  +
  geom_waffle(
    n_rows = 10,
    size = .5,
    color = "white",
    make_proportional = TRUE,
    radius = unit(5, "pt")
  ) +
  scale_fill_manual(
    values = c(
      'alternative' = "#FFA500",
      'extremist' = "#CD5C5C",
      'mainstream' = "#015CB9",
      'other' = '#E3E6E6'
    ),
    labels = c(
      'Alternative channel\nvideos' ,
      'Extremist channel\nvideos',
      'Mainstream media\nchannel videos',
      'Other channel\nvideos'
    ),
    name = "Recommendations followed to:"
  ) +
  theme(
    plot.title = ggtext::element_markdown(size = 12, hjust = .5),
    legend.position = 'bottom'
  )

common_legend_followed <- lemon::g_legend(dummy_plot_followed)

all_waffles_followed <- gridExtra::arrangeGrob(
  alternative_recs_followed,
  extremist_recs_followed,
  mainstream_recs_followed,
  other_recs_followed,
  nrow = 1
)
```

``` r
recs_followed_grid <- plot_grid(
  plot_grid(
    bar_recs_followed_plot,
    all_waffles_followed,
    rel_heights = c(1.2, 1.5),
    labels = c(
      "A) Percentage of total recommendations followed:",
      "B) Recommendations followed when watching:"
    ),
    label_y = c(.9, 1.1),
    label_size = 18,
    hjust = 0,
    nrow = 2
  ),
  common_legend_followed,
  ncol = 1,
  rel_heights = c(1, .1)
)
recs_followed_grid
```

<img src="appendix-figures_files/figure-gfm/recommendations followed plot-1.png" style="display: block; margin: auto;" />

``` r
ggsave('./appendix-figures_files/waffle_recs_followed_wtd.png',
      dpi = 600, width = 14, height = 8)

ggsave('./appendix-figures_files/waffle_recs_followed_wtd.pdf',
      dpi = 600, width = 14, height = 8)
```

## Figure S7: Pages viewed immediately prior to YouTube videos by channel type

(CODE ONLY, SOURCE DATA IS ON SECURE SERVER. See `build.R` under
“REFERRERS DATA” for construction of
`on_platform_referrers_by_channel.csv`)

``` r
on_platform_referrers_by_channel_plot <-
  on_platform_referrers_by_channel %>%
  mutate(
    channel_type = case_when(
      channel_type == "alternative" ~ "Alternative channel videos",
      channel_type == "extremist" ~ "Extremist channel videos",
      channel_type == "mainstream" ~ "Mainstream media channel videos",
      channel_type == "other" ~ "Other channel videos"
    )
  ) %>%
  ggplot(
    data = .,
    aes(
      x = str_wrap_factor(reorder(channel_type, order_var), width = 17),
      y = percentage,
      fill = youtube_video_referrers_by_channel_type,
      group = channel_type
    )
  ) +
  geom_col(position = position_dodge2(padding = 0,
                                      width = .95)) +
  geom_linerange(aes(ymin = ci_lwr,
                     ymax = ci_upr),
                 size = .7,
                 position = position_dodge2(width = .90)) +
  labs(x = "", y = "Estimated percentage from preceding link") +
  scale_fill_manual(
    values = c(
      'alternative' = "#FFA500",
      'extremist'  = "#CD5C5C",
      'mainstream'  =  "#015CB9",
      'other' = "#E3E6E6",
      'Non-video on-platform'  =  "#7f7f7f",
      'Off-platform'  =  "#2D2E32"
    ),
    breaks  = c(
      'alternative',
      'other',
      'extremist',
      'Non-video on-platform',
      'mainstream',
      'Off-platform'
    ),
    labels = c(
      'alternative' = "Alternative\nchannel videos",
      'extremist' = "Extremist\nchannel videos",
      'mainstream'  =  "Mainstream media\nchannel videos",
      'other' = "Other\nchannel videos",
      'Non-video on-platform'  = "Non-video \non-platform",
      'Off-platform'  = "Off-platform"
    ),
    name = "Type of preceding link:"
  ) +
  scale_y_continuous(
    limits = c(0, 60),
    labels = paste0(seq(0, 60, 20), "%"),
    breaks = seq(0, 60, 20)
  ) +
  ggthemes::theme_gdocs() +
  theme(
    legend.position = 'bottom',
    legend.direction = 'horizontal',
    panel.grid.major.y = element_line(color = "grey95"),
    panel.grid.major.x = element_blank(),
    axis.text.x = element_text(size = 12),
    plot.background = element_rect(color = "white")
  )

on_platform_referrers_by_channel_plot
```

<img src="appendix-figures_files/figure-gfm/unnamed-chunk-5-1.png" style="display: block; margin: auto;" />

``` r
ggsave('./appendix-figures_files/on_platform_referrers_by_channel_wtd.png',
      dpi = 600, width = 16, height = 10)
ggsave('./appendix-figures_files/on_platform_referrers_by_channel_wtd.pdf',
      dpi = 600, width = 16, height = 10)
```

\[TABLE S2: Correlates of time on YouTube videos by channel type\]

## Figure S10 and Table S5: Correlates of YouTube video exposure by channel type (with party controls)

``` r
# with party ID
f_time_pid_alternative_all <-
  formula(
    activity_yt_video_time_elapsed_capped_total_alternative_all ~  rr_cts + jw_cts + fem_cts + age + gender + educ2 + race + pid_lean
  )
f_time_pid_extremist_all <-
  formula(
    activity_yt_video_time_elapsed_capped_total_extremist_all  ~  rr_cts + jw_cts + fem_cts + age + gender + educ2 + race + pid_lean
  )
f_time_pid_mainstream_all <-
  formula(
    activity_yt_video_time_elapsed_capped_total_mainstream_all  ~  rr_cts + jw_cts + fem_cts + age + gender + educ2 + race + pid_lean
  )
f_visit_pid_alternative_all <-
  formula(
    activity_yt_n_video_alternative_all ~  rr_cts + jw_cts + fem_cts + age + gender + educ2 + race + pid_lean
  )
f_visit_pid_extremist_all <-
  formula(
    activity_yt_n_video_extremist_all ~  rr_cts + jw_cts + fem_cts + age + gender + educ2 + race + pid_lean
  )
f_visit_pid_mainstream_all <-
  formula(
    activity_yt_n_video_mainstream_all  ~  rr_cts + jw_cts + fem_cts + age + gender + educ2 + race  + pid_lean
  )

# (FIRE ) with different rr measure (rr_over_mean_cces)
f_time_fire_alternative_all <-
  formula(
    activity_yt_video_time_elapsed_capped_total_alternative_all ~  rr_over_mean_cces + jw_cts + fem_cts + age + gender + educ2 + race
  )
f_time_fire_extremist_all <-
  formula(
    activity_yt_video_time_elapsed_capped_total_extremist_all  ~  rr_over_mean_cces + jw_cts + fem_cts + age + gender + educ2 + race
  )
f_time_fire_mainstream_all <-
  formula(
    activity_yt_video_time_elapsed_capped_total_mainstream_all  ~  rr_over_mean_cces + jw_cts + fem_cts + age + gender + educ2 + race
  )
f_visit_fire_alternative_all <-
  formula(
    activity_yt_n_video_alternative_all ~  rr_over_mean_cces + jw_cts + fem_cts + age + gender + educ2 + race
  )
f_visit_fire_extremist_all <-
  formula(activity_yt_n_video_extremist_all ~  rr_over_mean_cces + jw_cts + fem_cts + age + gender + educ2 + race)

f_visit_fire_mainstream_all <-
  formula(
    activity_yt_n_video_mainstream_all  ~  rr_over_mean_cces + jw_cts + fem_cts + age + gender + educ2 + race
  )


#FIRE
time_fire_fs <- list(
  f_time_fire_alternative_all,
  f_time_fire_extremist_all,
  f_time_fire_mainstream_all
)
visit_fire_fs <- list(
  f_visit_fire_alternative_all,
  f_visit_fire_extremist_all,
  f_visit_fire_mainstream_all
)

#PID 
time_pid_fs <- list(
  f_time_pid_alternative_all,
  f_time_pid_extremist_all,
  f_time_pid_mainstream_all
)
visit_pid_fs <- list(
  f_visit_pid_alternative_all,
  f_visit_pid_extremist_all,
  f_visit_pid_mainstream_all
)

robust_weighted_quasipoisson <- function(data = activity_data, 
                                         formula,
                                         robust_output = TRUE) {
  fit <- glm(formula, 
             family = quasipoisson(link = "log"),
             offset = log(week),
             data = activity_data,
             weights = weight_cmd) 
  
  results <- fit
  
  if (robust_output == TRUE) {
    cov_mod <- vcovHC(fit, type = "HC0")
    std_err <- sqrt(diag(cov_mod))
    q_val <- qnorm(0.975) # stick to 95% CI
    
    results <- bind_cols(
      predictor = fit$coefficients %>% names(),
      estimate = coef(fit),
      robust_se = std_err,
      z = (coef(fit) / std_err),
      p_val = 1.96 * pnorm(abs(coef(fit) / std_err), lower.tail = FALSE),
      ci_lwr = coef(fit) - q_val  * std_err,
      ci_upr = coef(fit) + q_val  * std_err,
    ) %>%
      mutate(
        stat_sig = if_else(p_val < .05, "*", ""),
        channel_type = str_extract(as.character(formula)[2], "alternative|extremist|mainstream")
      ) 
  }
  return(results)
}
```

``` r
QP_visit_fire_fit <- list()
QP_time_fire_fit <- list()
QP_visit_pid_fit <- list()
QP_time_pid_fit <- list()

for (i in 1:length(visit_fire_fs)) {
  QP_visit_fire_fit[[i]] <-
    robust_weighted_quasipoisson(formula = visit_fire_fs[[i]])
  QP_time_fire_fit[[i]] <-
    robust_weighted_quasipoisson(formula = time_fire_fs[[i]])
  QP_visit_pid_fit[[i]] <-
    robust_weighted_quasipoisson(formula = visit_pid_fs[[i]])
  QP_time_pid_fit[[i]] <-
    robust_weighted_quasipoisson(formula = time_pid_fs[[i]])
}

names(QP_time_fire_fit) <-
  c("time_fire_alternative_full",
    "time_fire_extremist_full",
    "time_fire_mainstream_full")
names(QP_visit_fire_fit) <-
  c("visit_fire_alternative_full",
    "visit_fire_extremist_full",
    "visit_fire_mainstream_full")
names(QP_time_pid_fit) <-
  c("time_pid_alternative_full",
    "time_pid_extremist_full",
    "time_pid_mainstream_full")
names(QP_visit_pid_fit) <-
  c("visit_pid_alternative_full",
    "visit_pid_extremist_full",
    "visit_pid_mainstream_full")


coef_names <- c("Intercept",
                "Denial of racism",
                "Feeling Jews",
                "Hostile sexism",
                "Age",
                "Male",
                "Some college",
                "Bachelor's degree",
                "Post-grad",
                "Non-white")
```

``` r
visit_pid_models <- bind_rows(QP_visit_pid_fit)
pid_visit_plot <- coef_plot(data = visit_pid_models) 

time_pid_models <- bind_rows(QP_time_pid_fit)
pid_time_plot <- coef_plot(data = time_pid_models) 

pid_visit_plot + pid_time_plot + plot_layout(ncol = 1)
```

<img src="appendix-figures_files/figure-gfm/PID visit and time plot-1.png" style="display: block; margin: auto;" />

``` r
ggsave('./appendix-figures_files/qpois_coefficients_pid.png',
      dpi = 600, width = 20, height = 11)
```

``` r
# separately estimate without robust SEs becuase tex reg doesn't like this... :L
QP_visit_pid_fit_nonrob <- QP_time_pid_fit_nonrob <- list()
for (i in 1:length(visit_pid_fs)) {
  QP_visit_pid_fit_nonrob[[i]] <-
    robust_weighted_quasipoisson(formula = visit_pid_fs[[i]],
                                 robust_output = F)
  QP_time_pid_fit_nonrob[[i]] <-
    robust_weighted_quasipoisson(formula = time_pid_fs[[i]],
                                 robust_output = F)
}
robust_SEs <- bind_rows(visit_pid_models, time_pid_models)$robust_se
robust_pvals<- bind_rows(visit_pid_models, time_pid_models)$p_val
texreg(c(QP_visit_pid_fit_nonrob, QP_time_pid_fit_nonrob),
       digits = 2,
       booktabs = TRUE, 
       label = "tab:figa7table",
       caption = "Quasipoisson coefficients for correlates of of views and time per week spent on videos from alternative, extremist, and mainstream media channels. Robust standard errors are in parentheses.",
       # texreg reads models as separate lists
       # manually adding in robust p-vals
       override.pvalues = list(robust_pvals[1:12],
                    robust_pvals[13:24],
                    robust_pvals[25:36],
                    robust_pvals[37:48],
                    robust_pvals[49:60],
                    robust_pvals[61:72]),
       override.se = list(robust_SEs[1:12],
                    robust_SEs[13:24],
                    robust_SEs[25:36],
                    robust_SEs[37:48],
                    robust_SEs[49:60],
                    robust_SEs[61:72]),
       custom.model.names = rep(c("Alternative",
                                 "Extremist",
                                 "Mainstream"),2),
       custom.coef.map =
         list(
            "fem_cts" = "Hostile sexism",
            "rr_cts" = "Racial resentment",
            "jw_cts" = "Feeling Jews",
            "pid_leanDemocrat" = "Democrat",
            "pid_leanRepublican" = "Republican",
            "age" = "Age",
            "genderMale" =  "Male",
            "raceNon-white" = "Non-white",
            "educ2Some college" = "Some college",
            "educ24-year" = "Bachelor's degree",
            "educ2Post-grad" = "Post-grad",
            "(Intercept)" = "Intercept"
         ),
          file = "appendix-figures_files/figureA7.tex")
```

### Tables S6 and S7: Separating Hostile sexism and Racial resentment specifications

``` r
# without hostile sexism
f_time_pid_alternative_rr <-
  formula(
    activity_yt_video_time_elapsed_capped_total_alternative_all ~  rr_cts + jw_cts + age + gender + educ2 + race 
  )
f_time_pid_extremist_rr <-
  formula(
    activity_yt_video_time_elapsed_capped_total_extremist_all  ~  rr_cts + jw_cts + age + gender + educ2 + race 
  )
f_time_pid_mainstream_rr <-
  formula(
    activity_yt_video_time_elapsed_capped_total_mainstream_all  ~  rr_cts + jw_cts + age + gender + educ2 + race 
  )
f_visit_pid_alternative_rr <-
  formula(
    activity_yt_n_video_alternative_all ~  rr_cts + jw_cts  + age + gender + educ2 + race
  )
f_visit_pid_extremist_rr <-
  formula(activity_yt_n_video_extremist_all ~  rr_cts + jw_cts  + age + gender + educ2 + race)

f_visit_pid_mainstream_rr <-
  formula(
    activity_yt_n_video_mainstream_all  ~  rr_cts + jw_cts + age + gender + educ2 + race
  )


# without racial resentment
f_time_pid_alternative_hs <-
  formula(
    activity_yt_video_time_elapsed_capped_total_alternative_all ~  fem_cts + jw_cts + age + gender + educ2 + race 
  )
f_time_pid_extremist_hs <-
  formula(
    activity_yt_video_time_elapsed_capped_total_extremist_all  ~  fem_cts + jw_cts + age + gender + educ2 + race 
  )
f_time_pid_mainstream_hs <-
  formula(
    activity_yt_video_time_elapsed_capped_total_mainstream_all  ~  fem_cts + jw_cts + age + gender + educ2 + race 
  )

f_visit_pid_alternative_hs <-
  formula(
    activity_yt_n_video_alternative_all ~  fem_cts + jw_cts + age + gender + educ2 + race 
  )
f_visit_pid_extremist_hs <-
  formula(
    activity_yt_n_video_extremist_all  ~  fem_cts + jw_cts + age + gender + educ2 + race 
  )
f_visit_pid_mainstream_hs <-
  formula(
    activity_yt_n_video_mainstream_all  ~  fem_cts + jw_cts + age + gender + educ2 + race 
  )


time_rr_fs <- list(
  f_time_pid_alternative_rr,
  f_time_pid_extremist_rr,
  f_time_pid_mainstream_rr
)
visit_rr_fs <- list(
  f_visit_pid_alternative_rr,
  f_visit_pid_extremist_rr,
  f_visit_pid_mainstream_rr
)
time_hs_fs <- list(
  f_time_pid_alternative_hs,
  f_time_pid_extremist_hs,
  f_time_pid_mainstream_hs
)
visit_hs_fs <- list(
  f_visit_pid_alternative_hs,
  f_visit_pid_extremist_hs,
  f_visit_pid_mainstream_hs
)


QP_time_rr_fit <- list()
QP_time_hs_fit <- list()
QP_visit_rr_fit <- list()
QP_visit_hs_fit <- list()

for (i in 1:length(time_rr_fs)) {
  QP_time_rr_fit[[i]] <-
    robust_weighted_quasipoisson(formula = time_rr_fs[[i]])
  QP_time_hs_fit[[i]] <-
    robust_weighted_quasipoisson(formula = time_hs_fs[[i]])
  QP_visit_rr_fit[[i]] <-
    robust_weighted_quasipoisson(formula = visit_rr_fs[[i]])
  QP_visit_hs_fit[[i]] <-
    robust_weighted_quasipoisson(formula = visit_hs_fs[[i]])
}

names(QP_time_rr_fit) <-
  c("time_rr_alternative_full",
    "time_rr_extremist_full",
    "time_rr_mainstream_full")
names(QP_time_hs_fit) <-
  c("time_hs_alternative_full",
    "time_hs_extremist_full",
    "time_hs_mainstream_full")
names(QP_visit_rr_fit) <-
  c("visit_rr_alternative_full",
    "visit_rr_extremist_full",
    "visit_rr_mainstream_full")
names(QP_visit_hs_fit) <-
  c("visit_hs_alternative_full",
    "visit_hs_extremist_full",
    "visit_hs_mainstream_full")
```

``` r
# separately estimate without robust SEs becuase tex reg doesn't like this... :L
QP_time_hs_fit_nonrob <- QP_time_rr_fit_nonrob <- list()
QP_visit_hs_fit_nonrob <- QP_visit_rr_fit_nonrob <- list()
for (i in 1:length(time_hs_fs)) {
  QP_time_hs_fit_nonrob[[i]] <-
    robust_weighted_quasipoisson(formula = time_hs_fs[[i]],
                                 robust_output = F)
  QP_time_rr_fit_nonrob[[i]] <-
    robust_weighted_quasipoisson(formula = time_rr_fs[[i]],
                                 robust_output = F)
  QP_visit_hs_fit_nonrob[[i]] <-
    robust_weighted_quasipoisson(formula = visit_hs_fs[[i]],
                                 robust_output = F)
  QP_visit_rr_fit_nonrob[[i]] <-
    robust_weighted_quasipoisson(formula = visit_rr_fs[[i]],
                                 robust_output = F)
}

time_rr_hs_models <- bind_rows(QP_time_hs_fit[[1]],
                               QP_time_rr_fit[[1]],
                               QP_time_hs_fit[[2]],
                               QP_time_rr_fit[[2]],
                               QP_time_hs_fit[[3]],
                               QP_time_rr_fit[[3]])
robust_SEs <- time_rr_hs_models$robust_se
robust_pvals<- time_rr_hs_models$p_val

# output tex table
texreg(
  list(
    QP_time_hs_fit_nonrob[[1]],
    QP_time_rr_fit_nonrob[[1]],
    QP_time_hs_fit_nonrob[[2]],
    QP_time_rr_fit_nonrob[[2]],
    QP_time_hs_fit_nonrob[[3]],
    QP_time_rr_fit_nonrob[[3]]
  ),
       digits = 2,
       booktabs = TRUE, 
       label = "tab:tablea7.1",
       caption = "Quasipoisson coefficients for correlates of of views and time per week spent on videos from alternative, extremist, and mainstream media channels. Robust standard errors are in parentheses.",
       # texreg reads models as separate lists
       # manually adding in robust p-vals
       override.pvalues = list(robust_pvals[1:9],
                    robust_pvals[10:18],
                    robust_pvals[19:27],
                    robust_pvals[28:36],
                    robust_pvals[37:45],
                    robust_pvals[46:54]),
       override.se = list(robust_SEs[1:9],
                    robust_SEs[10:18],
                    robust_SEs[19:27],
                    robust_SEs[28:36],
                    robust_SEs[37:45],
                    robust_SEs[46:54]),
       custom.model.names = rep(c("Alternative",
                                 "Extremist",
                                 "Mainstream"), each = 2),
       custom.coef.map =
         list(
            "fem_cts" = "Hostile sexism",
            "rr_cts" = "Racial resentment",
            "jw_cts" = "Feeling Jews",
            #"pid_leanDemocrat" = "Democrat",
            #"pid_leanRepublican" = "Republican",
            "age" = "Age",
            "genderMale" =  "Male",
            "raceNon-white" = "Non-white",
            "educ2Some college" = "Some college",
            "educ24-year" = "Bachelor's degree",
            "educ2Post-grad" = "Post-grad",
            "(Intercept)" = "Intercept"
         ),
          file = "appendix-figures_files/additional_regressions_hs_rr_time.tex")


visit_rr_hs_models <- bind_rows(QP_visit_hs_fit[[1]],
                               QP_visit_rr_fit[[1]],
                               QP_visit_hs_fit[[2]],
                               QP_visit_rr_fit[[2]],
                               QP_visit_hs_fit[[3]],
                               QP_visit_rr_fit[[3]])
robust_SEs <- visit_rr_hs_models$robust_se
robust_pvals<- visit_rr_hs_models$p_val

# output tex table
texreg(
  list(
    QP_visit_hs_fit_nonrob[[1]],
    QP_visit_rr_fit_nonrob[[1]],
    QP_visit_hs_fit_nonrob[[2]],
    QP_visit_rr_fit_nonrob[[2]],
    QP_visit_hs_fit_nonrob[[3]],
    QP_visit_rr_fit_nonrob[[3]]
  ),
       digits = 2,
       booktabs = TRUE, 
       label = "tab:tablea7.2",
       caption = "Quasipoisson coefficients for correlates of of views and time per week spent on videos from alternative, extremist, and mainstream media channels. Robust standard errors are in parentheses.",
       # texreg reads models as separate lists
       # manually adding in robust SEs
       override.pvalues = list(robust_pvals[1:9],
                    robust_pvals[10:18],
                    robust_pvals[19:27],
                    robust_pvals[28:36],
                    robust_pvals[37:45],
                    robust_pvals[46:54]),
       override.se = list(robust_SEs[1:9],
                    robust_SEs[10:18],
                    robust_SEs[19:27],
                    robust_SEs[28:36],
                    robust_SEs[37:45],
                    robust_SEs[46:54]),
       custom.model.names = rep(c("Alternative",
                                 "Extremist",
                                 "Mainstream"), each = 2),
       custom.coef.map =
         list(
            "fem_cts" = "Hostile sexism",
            "rr_cts" = "Racial resentment",
            "jw_cts" = "Feeling Jews",
            "age" = "Age",
            "genderMale" =  "Male",
            "raceNon-white" = "Non-white",
            "educ2Some college" = "Some college",
            "educ24-year" = "Bachelor's degree",
            "educ2Post-grad" = "Post-grad",
            "(Intercept)" = "Intercept"
         ),
          file = "appendix-figures_files/additional_regressions_hs_rr_views.tex")
```

### Figure S11 and Table S8: Correlates of exposure to YouTube videos by channel type (alternate racial attitude measure)

``` r
visit_fire_models <- bind_rows(QP_visit_fire_fit)
fire_visit_plot <- coef_plot(data = visit_fire_models)

time_fire_models <- bind_rows(QP_time_fire_fit)
fire_time_plot <- coef_plot(data = time_fire_models)
```

``` r
# separately estimate without robust SEs becuase tex reg doesn't like this... :L
QP_visit_fire_fit_nonrob <- QP_time_fire_fit_nonrob <- list()
for (i in 1:length(time_fire_fs)) {
  QP_visit_fire_fit_nonrob[[i]] <-
    robust_weighted_quasipoisson(formula = visit_fire_fs[[i]],
                                 robust_output = F)
  QP_time_fire_fit_nonrob[[i]] <-
    robust_weighted_quasipoisson(formula = time_fire_fs[[i]],
                                 robust_output = F)
}
robust_SEs <- bind_rows(visit_fire_models, time_fire_models)$robust_se
robust_pvals <- bind_rows(visit_fire_models, time_fire_models)$p_val

texreg(c(QP_visit_fire_fit_nonrob, QP_time_fire_fit_nonrob),
       digits = 2,
       booktabs = TRUE, 
       label = "tab:figa8table",
       caption = "Quasipoisson coefficients for correlates of of views and time per week spent on videos from alternative, extremist, and mainstream media channels. Robust standard errors are in parentheses.",
       # texreg reads models as separate lists
       # manually adding in robust SEs
       override.pvalues =  list(robust_pvals[1:10],
                    robust_pvals[11:20],
                    robust_pvals[21:30],
                    robust_pvals[31:40],
                    robust_pvals[41:50],
                    robust_pvals[51:60]),
       override.se =  list(robust_SEs[1:10],
                    robust_SEs[11:20],
                    robust_SEs[21:30],
                    robust_SEs[31:40],
                    robust_SEs[41:50],
                    robust_SEs[51:60]),
       custom.model.names = rep(c("Alternative",
                                 "Extremist",
                                 "Mainstream"),2),
       custom.coef.map =
         list(
            "fem_cts" = "Hostile sexism",
            "rr_over_mean_cces" = "Denial of racism",
            "jw_cts" = "Feeling Jews",
            "age" = "Age",
            "genderMale" =  "Male",
            "raceNon-white" = "Non-white",
            "educ2Some college" = "Some college",
            "educ24-year" = "Bachelor's degree",
            "educ2Post-grad" = "Post-grad",
            "(Intercept)" = "Intercept"
         ),
          file = "appendix-figures_files/figureA8.tex")
```

### Factor loadings

``` r
rr_cces_vars <- activity_data %>%
  select(all_of(c("CC18_422e", "CC18_422f", "CC18_422g", "CC18_422h"))) #classical RR
fire_cces_vars <- activity_data %>%
  select(all_of(c("CC18_422e", "CC18_422f", "CC18_422g", "CC18_422h")),
         all_of(c("CC18_422a", "CC18_422b"))) # overstate 
fem_cces_vars <- activity_data %>%
  select(all_of(c("CC18_422c", "CC18_422d"))) #fem
all_cces_vars <- activity_data  %>%
  select(starts_with(c("CC18_422")))

# pca with varimax rotation
rr_pca_rot <- psych::principal(rr_cces_vars,
                 nfactors = 1, 
                 rotate = 'varimax')
rr_pca_rot
```

    ## Principal Components Analysis
    ## Call: psych::principal(r = rr_cces_vars, nfactors = 1, rotate = "varimax")
    ## Standardized loadings (pattern matrix) based upon correlation matrix
    ##            PC1   h2   u2 com
    ## CC18_422e 0.95 0.90 0.10   1
    ## CC18_422f 0.94 0.88 0.12   1
    ## CC18_422g 0.94 0.88 0.12   1
    ## CC18_422h 0.94 0.89 0.11   1
    ## 
    ##                 PC1
    ## SS loadings    3.55
    ## Proportion Var 0.89
    ## 
    ## Mean item complexity =  1
    ## Test of the hypothesis that 1 component is sufficient.
    ## 
    ## The root mean square of the residuals (RMSR) is  0.04 
    ##  with the empirical chi square  24.12  with prob <  5.8e-06 
    ## 
    ## Fit based upon off diagonal values = 1

``` r
fire_pca_rot <- psych::principal(fire_cces_vars,
                 nfactors = 1, 
                 rotate = 'varimax')
fire_pca_rot
```

    ## Principal Components Analysis
    ## Call: psych::principal(r = fire_cces_vars, nfactors = 1, rotate = "varimax")
    ## Standardized loadings (pattern matrix) based upon correlation matrix
    ##            PC1   h2   u2 com
    ## CC18_422e 0.93 0.87 0.13   1
    ## CC18_422f 0.93 0.87 0.13   1
    ## CC18_422g 0.93 0.86 0.14   1
    ## CC18_422h 0.93 0.87 0.13   1
    ## CC18_422a 0.91 0.84 0.16   1
    ## CC18_422b 0.83 0.69 0.31   1
    ## 
    ##                 PC1
    ## SS loadings    5.00
    ## Proportion Var 0.83
    ## 
    ## Mean item complexity =  1
    ## Test of the hypothesis that 1 component is sufficient.
    ## 
    ## The root mean square of the residuals (RMSR) is  0.04 
    ##  with the empirical chi square  57.03  with prob <  5e-09 
    ## 
    ## Fit based upon off diagonal values = 1

``` r
fem_pca_rot <- psych::principal(fem_cces_vars,
                 nfactors = 1, 
                 rotate = 'varimax')
fem_pca_rot
```

    ## Principal Components Analysis
    ## Call: psych::principal(r = fem_cces_vars, nfactors = 1, rotate = "varimax")
    ## Standardized loadings (pattern matrix) based upon correlation matrix
    ##            PC1   h2   u2 com
    ## CC18_422c 0.92 0.85 0.15   1
    ## CC18_422d 0.92 0.85 0.15   1
    ## 
    ##                 PC1
    ## SS loadings    1.70
    ## Proportion Var 0.85
    ## 
    ## Mean item complexity =  1
    ## Test of the hypothesis that 1 component is sufficient.
    ## 
    ## The root mean square of the residuals (RMSR) is  0.15 
    ##  with the empirical chi square  54.39  with prob <  NA 
    ## 
    ## Fit based upon off diagonal values = 0.95

``` r
all_pca_rot <- psych::principal(all_cces_vars,
                 nfactors = 2, 
                 rotate = 'varimax')
all_pca_rot
```

    ## Principal Components Analysis
    ## Call: psych::principal(r = all_cces_vars, nfactors = 2, rotate = "varimax")
    ## Standardized loadings (pattern matrix) based upon correlation matrix
    ##            RC1  RC2   h2   u2 com
    ## CC18_422a 0.81 0.43 0.84 0.16 1.5
    ## CC18_422b 0.77 0.35 0.72 0.28 1.4
    ## CC18_422c 0.40 0.84 0.86 0.14 1.4
    ## CC18_422d 0.44 0.79 0.82 0.18 1.6
    ## CC18_422e 0.73 0.58 0.87 0.13 1.9
    ## CC18_422f 0.83 0.43 0.87 0.13 1.5
    ## CC18_422g 0.79 0.49 0.86 0.14 1.7
    ## CC18_422h 0.77 0.52 0.86 0.14 1.8
    ## 
    ##                        RC1  RC2
    ## SS loadings           4.04 2.67
    ## Proportion Var        0.50 0.33
    ## Cumulative Var        0.50 0.84
    ## Proportion Explained  0.60 0.40
    ## Cumulative Proportion 0.60 1.00
    ## 
    ## Mean item complexity =  1.6
    ## Test of the hypothesis that 2 components are sufficient.
    ## 
    ## The root mean square of the residuals (RMSR) is  0.04 
    ##  with the empirical chi square  122.33  with prob <  7e-20 
    ## 
    ## Fit based upon off diagonal values = 1

``` r
# psych::fa.parallel(all_cces_vars,  fa="pc", n.iter=100,
#             show.legend=FALSE, main="Scree plot with parallel analysis")

#plot all PCs
AMR::ggplot_pca(prcomp(all_cces_vars[complete.cases(all_cces_vars),]))
```

<img src="appendix-figures_files/figure-gfm/unnamed-chunk-7-1.png" style="display: block; margin: auto;" />

### Correlation between racial and gender resentment

``` r
# n is 875
cor(activity_data$fem_cts, activity_data$rr_cts, 
    use = "complete.obs",
    method = "pearson")
```

    ## [1] 0.8356406

### Model with combined score

``` r
# combine racial+gender resentment variable
activity_data <- activity_data %>%
  rowwise() %>%
  mutate(rr_fem_cts = mean(c(rr_cts,fem_cts),na.rm=T ) ) %>%
  ungroup()


f_time_rrfem_alternative <-
  formula(
    activity_yt_video_time_elapsed_capped_total_alternative_all ~  rr_fem_cts + jw_cts + age + gender + educ2 + race
  )
f_time_rrfem_extremist <-
  formula(
    activity_yt_video_time_elapsed_capped_total_extremist_all  ~  rr_fem_cts + jw_cts + age + gender + educ2 + race 
  )
f_time_rrfem_mainstream <-
  formula(
    activity_yt_video_time_elapsed_capped_total_mainstream_all  ~  rr_fem_cts + jw_cts + age + gender + educ2 + race 
  )

f_visit_rrfem_alternative <-
  formula(
    activity_yt_n_video_alternative_all ~  rr_fem_cts + jw_cts + age + gender + educ2 + race
  )
f_visit_rrfem_extremist <-
  formula(
    activity_yt_n_video_extremist_all ~  rr_fem_cts + jw_cts + age + gender + educ2 + race 
  )
f_visit_rrfem_mainstream <-
  formula(
    activity_yt_n_video_mainstream_all  ~  rr_fem_cts + jw_cts + age + gender + educ2 + race  
  )

time_rrfem_fs <- list(
  f_time_rrfem_alternative,
  f_time_rrfem_extremist,
  f_time_rrfem_mainstream
)
visit_rrfem_fs <- list(
  f_visit_rrfem_alternative,
  f_visit_rrfem_extremist,
  f_visit_rrfem_mainstream
)

# estimate with and without robust SEs
QP_time_rrfem_fit <- QP_visit_rrfem_fit <- list()
QP_time_rrfem_fit_norob <- QP_visit_rrfem_fit_norob <- list()
for (i in 1:length(time_rrfem_fs)) {
  QP_time_rrfem_fit[[i]] <-
    robust_weighted_quasipoisson(formula = time_rrfem_fs[[i]])
  QP_time_rrfem_fit_norob[[i]] <-
    robust_weighted_quasipoisson(formula = time_rrfem_fs[[i]],
                                 robust_output = F)
  QP_visit_rrfem_fit[[i]] <-
    robust_weighted_quasipoisson(formula = visit_rrfem_fs[[i]])
  QP_visit_rrfem_fit_norob[[i]] <-
    robust_weighted_quasipoisson(formula = visit_rrfem_fs[[i]],
                                 robust_output = F)
}
robust_SEs <- bind_rows(c(QP_visit_rrfem_fit, QP_time_rrfem_fit) )$robust_se
robust_pvals <- bind_rows(c(QP_visit_rrfem_fit, QP_time_rrfem_fit) )$p_val

texreg(c(QP_visit_rrfem_fit_norob, QP_time_rrfem_fit_norob),
       digits = 2,
       booktabs = TRUE, 
       label = "tab:figaxtable",
       caption = "Quasipoisson coefficients for correlates of of views and time per week spent on videos from alternative, extremist, and mainstream media channels. Robust standard errors are in parentheses.",
       # texreg reads models as separate lists
       override.pvalues =  list(robust_pvals[1:9],
                    robust_pvals[10:18],
                    robust_pvals[19:27],
                    robust_pvals[28:36],
                    robust_pvals[37:45],
                    robust_pvals[46:54]),
       override.se =  list(robust_SEs[1:9],
                    robust_SEs[10:18],
                    robust_SEs[19:27],
                    robust_SEs[28:36],
                    robust_SEs[37:45],
                    robust_SEs[46:54]),
       custom.model.names = rep(c("Alternative",
                                 "Extremist",
                                 "Mainstream"),2),
       custom.coef.map =
         list(
            "rr_fem_cts" = "Combined racial resentment and hosile sexism",
            "jw_cts" = "Feeling Jews",
            "age" = "Age",
            "genderMale" =  "Male",
            "raceNon-white" = "Non-white",
            "educ2Some college" = "Some college",
            "educ24-year" = "Bachelor's degree",
            "educ2Post-grad" = "Post-grad",
            "pid_leanDemocrat" = "Democrat",
            "pid_leanRepublican" = "Republican",
            "(Intercept)" = "Intercept"
         ),
          file = "appendix-figures_files/additional_regressions_combined_hs_rr.tex")
```

## Figure S8 and Table S3: Zero-inflated models on correlates of time on YouTube video by channel type

``` r
# make floats integers for pscl
activity_data <- activity_data %>%
  mutate(
    minutes_activity_yt_video_time_elapsed_capped_total_alternative_all = round(
      minutes_activity_yt_video_time_elapsed_capped_total_alternative_all,
      0
    ),
    minutes_activity_yt_video_time_elapsed_capped_total_extremist_all = round(
      minutes_activity_yt_video_time_elapsed_capped_total_extremist_all,
      0
    ),
    minutes_activity_yt_video_time_elapsed_capped_total_mainstream_all = round(
      minutes_activity_yt_video_time_elapsed_capped_total_mainstream_all,
      0
    )
  )

# formulas
zif_time_alternative_all <-
  formula(
    minutes_activity_yt_video_time_elapsed_capped_total_alternative_all ~  rr_cts + jw_cts + fem_cts + age + gender + educ2 + race |
      rr_cts + jw_cts + fem_cts + age + gender + educ2 + race
  )
zif_time_extremist_all <-
  formula(
    minutes_activity_yt_video_time_elapsed_capped_total_extremist_all  ~  rr_cts + jw_cts + fem_cts + age + gender + educ2 + race |
      rr_cts + jw_cts + fem_cts + age + gender + educ2 + race
  )
zif_time_mainstream_all <-
  formula(
    minutes_activity_yt_video_time_elapsed_capped_total_mainstream_all  ~  rr_cts + jw_cts + fem_cts + age + gender + educ2 + race |
      rr_cts + jw_cts + fem_cts + age + gender + educ2 + race
  )

#toutes
zi_time_fs <- list(
  zif_time_alternative_all,
  zif_time_extremist_all,
  zif_time_mainstream_all
)

ZIP_time_fit <- list()
for (i in 1:length(zi_time_fs)) {
  ZIP_time_fit[[i]] <- pscl::zeroinfl(
    zi_time_fs[[i]],
    data = activity_data,
    weights = weight_cmd,
    offset = log(week),
    dist = "poisson"
  )
  #print(summary(ZIP_time_fit[[i]]))
}

names(ZIP_time_fit) <-
  c("time_alternative_full",
    "time_extremist_full",
    "time_mainstream_full")
```

``` r
time_plot_labels <- c(
  "alternative" = str_wrap("Minutes/week on alternative channel videos", width = 30),
  "extremist" = str_wrap("Minutes/week on extremist channel videos", width = 30),
  "mainstream" = str_wrap("Minutes/week on mainstream media channel videos", width = 30)
)
robust_estimates_fxn <- function(model) {
  result <- lmtest::coeftest(model, vcov = sandwich(model))
  CIs <- confint(result) # honestly probably better to bootstrap, but w/e just be consistent
  ci_lwr <- CIs[,1]
  ci_upr <- CIs[,2]
  stderrs <- result[,2]
  pvals <- result[,4]
  
  out <- tibble(ci_lwr, 
                ci_upr,
                robust_se = stderrs,
                pval = pvals,
                estimate = result[,1],
                covar = row.names(result)
                ) %>% 
    separate(covar, 
             into = c("component", "predictor"), 
             sep = "_", 
             extra = "merge")
  return( out)
}

zip_coef_plot <- function(coef_df) {
  
  if(unique(coef_df$component) %in% "zero") {
    x_limit <-  c(-3.5, 3)
     x_lab <- "Logit coefficient"
  } else {
    x_limit <- c(-5, 5)
     x_lab <- "Poisson coefficient"
  }
  if( str_detect(deparse(substitute(coef_df)), "visit") ){
    facet_lab <- visit_plot_labels
  } else facet_lab <- time_plot_labels
  
  coef_df %>% 
  ggplot(aes(x = estimate, y = str_wrap_factor(predictor, width = 12) )) +
  geom_vline(xintercept = 0, lty = 2) +
  geom_linerange(aes(xmin = ci_lwr, xmax = ci_upr,
                     color = channel_type),
                 lwd = 8, 
             show.legend = FALSE) +
  geom_point(size = 10, shape = 21, stroke = 1,
             color = "black", fill = "#FFFFFF") +
  geom_text(aes(label = format(round(estimate, 2),nsmall = 2)) ,
            size = 3.5, color = "black") +
  geom_text(aes(label = stat_sig ),
            size = 8, nudge_x = .5, nudge_y = .25) +
  facet_wrap( ~ channel_type,
              labeller = as_labeller(
               facet_lab
              )) +
  scale_color_manual(values = c("alternative" = "#FFA500", 
                                "extremist" = "#CD5C5C", 
                                "mainstream" = "#015CB9")) +
  labs(y = "", 
       x = x_lab
       ) +
  scale_x_continuous(limits = x_limit) +
  theme_bw() + 
  theme(strip.text = element_text(size = 12, hjust = .5),
        strip.background = element_rect(fill = "grey", 
                                        color = "grey"),
        axis.text.y = element_text(size = 12))
}

ZIP_time_df <- map_dfr(1:3, ~robust_estimates_fxn(ZIP_time_fit[[.x]])) %>% 
  mutate(channel_type = c(
    rep("alternative", 20),
    rep("extremist", 20),
    rep("mainstream", 20)
  ),
  stat_sig = case_when(estimate > 0 & ci_lwr > 0 ~ "*",
                              estimate < 0 & ci_upr < 0 ~ "*",
                              TRUE ~ "")) 

ZIP_time_count_df <- ZIP_time_df %>% 
  filter(component == "count")  %>% 
  filter(predictor != "(Intercept)") %>% 
  mutate(predictor = refactor_fxn(recode_fxn(predictor)))
ZIP_time_zero_df <- ZIP_time_df %>% 
  filter(component == "zero") %>% 
  filter(predictor != "(Intercept)") %>% 
  mutate(predictor = refactor_fxn(recode_fxn(predictor)))



ZIP_time_count_plot <- zip_coef_plot(ZIP_time_count_df)
ZIP_time_zero_plot <- zip_coef_plot(ZIP_time_zero_df)

plot_grid(
  ggplot(),
  ZIP_time_zero_plot,
  ggplot(),
  ZIP_time_count_plot,
  nrow = 4,
  rel_heights = c(.1, 1, .1, 1),
  vjust = 3, 
  labels = c("A: Zero component", "", "B: Count component", "")
)
```

<img src="appendix-figures_files/figure-gfm/zip plot-1.png" style="display: block; margin: auto;" />

``` r
ggsave("appendix-figures_files/zip_coefficient_time.png",
       height = 14, width = 11 )
```

``` r
texreg(ZIP_time_fit,
       digits = 2,
       booktabs = TRUE, 
       label = "tab:figa5table",
       caption = "Zero-inflated Poisson coefficients for correlates of the time per week spent on videos from alternative, extremist, and mainstream media channels. Robust standard errors are in parentheses.",
       # texreg reads models as separate lists
       override.pvalues = list(ZIP_time_df$pval[1:20],
                               ZIP_time_df$pval[21:40],
                               ZIP_time_df$pval[41:60]),
       override.se = list(ZIP_time_df$robust_se[1:20],
                               ZIP_time_df$robust_se[21:40],
                               ZIP_time_df$robust_se[41:60]),
       custom.model.names = c("Alternative",
                              "Extremist",
                              "Mainstream"),
       custom.coef.map =
         list(
           "Count model: (Intercept)" = "Intercept",
           "Count model: rr_cts" = "Racial resentment",
           "Count model: jw_cts" = "Feeling Jews",
           "Count model: fem_cts" = "Hostile sexism",
           "Count model: age" = "Age",
           "Count model: genderMale" = "Male",
           "Count model: educ2Some college" = "Some college",
           "Count model: educ24-year" = "Bachelor's degree",
           "Count model: educ2Post-grad" = "Post-grad",
           "Count model: raceNon-white" = "Non-white",
           "Zero model: (Intercept)" = "Intercept",
           "Zero model: rr_cts" = "Racial resentment",
           "Zero model: jw_cts" = "Feeling Jews",
           "Zero model: fem_cts" = "Hostile sexism",
           "Zero model: age" = "Age",
           "Zero model: genderMale" = "Male",
           "Zero model: educ2Some college" = "Some college",
           "Zero model: educ24-year" = "Bachelor's degree",
           "Zero model: educ2Post-grad" = "Post-grad",
           "Zero model: raceNon-white" = "Non-white"
         ),
       reorder.coef = c(seq(1,20, by=2),
                        seq(2, 20, by=2)),
       file = "appendix-figures_files/figureA5.tex")
```

## Figure S9 and Table S4: Correlates of YouTube video views by channel type

``` r
# DV is view counts
f_visit_alternative_all <- formula(activity_yt_n_video_alternative_all ~  rr_cts + jw_cts + fem_cts + age + gender + educ2 + race )
f_visit_extremist_all <- formula(activity_yt_n_video_extremist_all ~  rr_cts + jw_cts + fem_cts + age + gender + educ2 + race)
f_visit_mainstream_all <- formula(activity_yt_n_video_mainstream_all  ~  rr_cts + jw_cts + fem_cts + age + gender + educ2 + race )  

main_visit_fs <- list(
  f_visit_alternative_all,
  f_visit_extremist_all,
  f_visit_mainstream_all
)

QP_visit_fit <- list()
for (i in 1:length(main_visit_fs)) {
  QP_visit_fit[[i]] <-
    robust_weighted_quasipoisson(
      formula = main_visit_fs[[i]]
    )
  #print(QP_visit_fit[[i]])
}
names(QP_visit_fit) <- 
  c(
    "visit_alternative_full",
    "visit_extremist_full",
    "visit_mainstream_full"
  )

coef_names <- c("Intercept",
                "Racial resentment",
                "Feeling Jews",
                "Hostile sexism",
                "Age",
                "Male",
                "Some college",
                "Bachelor's degree",
                "Post-grad",
                "Non-white")

visit_models <- bind_rows(QP_visit_fit)
```

``` r
visit_plot_labels <- c(
  "alternative" = str_wrap("Views/week to alternative channel videos", width = 30),
  "extremist" = str_wrap("Views/week to extremist channel videos", width = 30),
  "mainstream" = str_wrap("Views/week to mainstream media channel videos", width = 30)
)

visit_models %>% 
  filter(predictor != "(Intercept)") %>% 
  mutate(predictor = refactor_fxn(recode_fxn(predictor))) %>% 
  ggplot(aes(x = estimate, y = str_wrap_factor(predictor, width = 12) )) +
  geom_vline(xintercept = 0, lty = 2) +
  geom_linerange(aes(xmin = ci_lwr, xmax = ci_upr,
                     color = channel_type),
                 size = 9,
             show.legend = FALSE) +
  geom_point(size = 10, shape = 21, stroke = 1,
             color = "black", fill = "#FFFFFF") +
  geom_text(aes(label = round(estimate, 2)),
            size = 3.5, color = "black") +
  geom_text(aes(label = stat_sig ),
            size = 8, nudge_x = .45, nudge_y = .25) +
  facet_wrap( ~ channel_type,
              labeller = as_labeller(
               visit_plot_labels
              )) +
  scale_color_manual(values = c("alternative" = "#FFA500", 
                                "extremist" = "#CD5C5C", 
                                "mainstream" = "#015CB9")) +
  labs(y = "", x = "Quasipoisson coefficient") +
  theme_bw() + 
  theme(strip.text = element_text(size = 12, hjust = .5),
        strip.background = element_rect(fill="grey", color = "grey"),
        axis.text.y = element_text(size = 12))
```

<img src="appendix-figures_files/figure-gfm/visits plot-1.png" style="display: block; margin: auto;" />

``` r
ggsave("appendix-figures_files/qpois_coefficient_visit.png",
       height = 6, width = 11 )
```

``` r
# separately estimate without robust SEs becuase tex reg doesn't like this... :L
QP_visit_fit_nonrob <- list()
for (i in 1:length(main_visit_fs)) {
  QP_visit_fit_nonrob[[i]] <-
    robust_weighted_quasipoisson(formula = main_visit_fs[[i]],
                                 robust_output = F)
}

robust_SEs <- visit_models$robust_se
robust_pvals<- visit_models$p_val
texreg(QP_visit_fit_nonrob,
       digits = 2,
       booktabs = TRUE, 
       label = "tab:figa6table",
       caption = "Quasipoisson coefficients for correlates of time per week spent on videos from alternative, extremist, and mainstream media channels. Robust standard errors are in parentheses.",
       # texreg reads models as separate lists
       override.pvalues = list(robust_pvals[1:10],
                    robust_pvals[11:20],
                    robust_pvals[21:30]),
       override.se = list(robust_SEs[1:10],
                    robust_SEs[11:20],
                    robust_SEs[21:30]),
       custom.model.names = c("Alternative",
                                 "Extremist",
                                 "Mainstream"),
       custom.coef.map =
         list(
            "fem_cts" = "Hostile sexism",
            "rr_cts" = "Racial resentment",
            "jw_cts" = "Feeling Jews",
            "age" = "Age",
            "genderMale" =  "Male",
            "raceNon-white" = "Non-white",
            "educ2Some college" = "Some college",
            "educ24-year" = "Bachelor's degree",
            "educ2Post-grad" = "Post-grad",
            "(Intercept)" = "Intercept"
         ),
          file = "appendix-figures_files/figureA6.tex")
```

## Figure S12: Correlation between browser history and activity

``` r
### --------- specify a bunch of variables we want to look at
channel_types <- c("alternative",
                   "extremist",
                   "mainstream",
                   "other")

# proportion that watched any alt/ext/msm videos
bh_dummy <- c("bh_alternative_any",
              "bh_extremist_any",
              "bh_mainstream_any",
              "bh_other_any")
at_dummy <- c("at_alternative_any",
              "at_extremist_any",
              "at_mainstream_any",
              "at_other_any")
bh_counts <- paste0("browser_history_yt_n_video_",
                    channel_types,
                    "_all")
bh_time <-
  paste0("browser_history_yt_video_time_elapsed_capped_total_",
         channel_types,
         "_all")
at_counts <- paste0("activity_yt_n_video_",
                    channel_types,
                    "_all")
at_time <- paste0("activity_yt_video_time_elapsed_capped_total_",
                  channel_types,
                  "_all")

# number subscribed to alternative
bh_subscribed_counts <- paste0("browser_history_yt_n_video_",
                               channel_types,
                               "_subscribed")
at_subscribed_counts  <-  paste0("activity_yt_n_video_",
                                 channel_types,
                                 "_subscribed")
# time on subscribed to alternative
bh_subscribed_time <- paste0("browser_history_yt_video_time_elapsed_capped_total_",
                               channel_types,
                               "_subscribed")
at_subscribed_time  <-  paste0("activity_yt_video_time_elapsed_capped_total_",
                                 channel_types,
                                 "_subscribed")


# put activity and browser history together
select_activity_data_data <- activity_data %>%
  select(caseid, at_alt, at_ext, at_msm, at_other, activity_n_total,
         all_of(at_counts),
         all_of(at_time),
         all_of(at_subscribed_counts),
         all_of(at_subscribed_time))
select_browser_history_data <- browser_history_data %>%
  select(caseid, bh_alt, bh_ext, bh_msm, bh_other, browser_history_n_total,
         all_of(bh_counts),
         all_of(bh_time),
         all_of(bh_subscribed_counts),
         all_of(bh_subscribed_time))
bh_at_data <-
  full_join(select_activity_data_data, select_browser_history_data, 
            by = "caseid") %>%
  rename( bh_alternative_any = bh_alt, 
         bh_extremist_any = bh_ext,
         bh_mainstream_any = bh_msm,
         bh_other_any = bh_other,
         at_alternative_any = at_alt,
         at_extremist_any = at_ext,
         at_mainstream_any = at_msm,
         at_other_any = at_other)

correlation_fxn <- function(bh_var, at_var, data = bh_at_data) {
  r = cor(x = data[[bh_var]], 
          y = data[[at_var]], 
          use = "complete.obs", method = "pearson")
  out <- tibble(corr = r,
                channel_type = str_extract(at_var, 
                                           paste0(channel_types, collapse = "|") ),
                metric = case_when(
                  # youtube general
                  str_detect(at_var, "_yt_n_total") ~ "total yt",
                  # youtube channel type
                  str_detect(at_var, "any") ~ "binary",
                  str_detect(at_var, "subscribed") &
                    str_detect(at_var, "yt_n_video") ~ "sub count",
                  str_detect(at_var, "subscribed") &
                    str_detect(at_var, "time") ~ "sub time",
                  str_detect(at_var, "yt_n_video") ~ "count",
                  str_detect(at_var, "time") ~ "time"
                ))
  return(out)
}

correlation_plot <- function(bh_var, at_var, data = merged_data) {
  r = cor(x = data[[bh_var]],
          y = data[[at_var]],
          use = "complete.obs", method = "pearson")
  out <- ggplot(aes(x = data[[bh_var]],
                    y = data[[at_var]]),
                data = data) +
    geom_point() +
    geom_abline()
  return(out)
}
```

``` r
# any visits to channel type
corr_binary <- map2(.x = bh_dummy, .y = at_dummy,
     correlation_fxn) %>% 
  bind_rows()
# number of videos
corr_counts <- map2(.x = bh_counts, .y = at_counts,
     correlation_fxn) %>% 
  bind_rows()
# capped time on videos
corr_time <- map2(.x = bh_time, .y = at_time,
     correlation_fxn) %>% 
  bind_rows()

# number subscribed
corr_subs_count <- map2(.x = bh_subscribed_counts, .y = at_subscribed_counts,
     correlation_fxn) %>% 
  bind_rows()
# time subscribed
corr_subs_time <- map2(.x = bh_subscribed_time, .y = at_subscribed_time,
     correlation_fxn) %>% 
  bind_rows()

# youtube correlations
youtube_corr_df <- corr_binary %>% 
  bind_rows(corr_counts) %>% 
  bind_rows(corr_time) %>% 
  bind_rows(corr_subs_count) %>% 
  bind_rows(corr_subs_time) %>% 
  mutate(channel_type = recode_channel_type_fxn(channel_type),
         metric = factor(metric, levels = c(
           "binary",
           "count", "sub count",
           "time", "sub time"
         )))


youtube_channel_type <- youtube_corr_df %>% 
  ggplot(aes(x = channel_type, 
             y = 1,
             fill = corr)) +
  geom_tile(color = "black") + 
  geom_text(aes(label = format(round(corr, 2), nsmall = 2)),
            color = "white", size = 6) +
  scale_fill_gradient2(low = "#FFFFFF", 
                       mid = "#6F9EBE", 
                       high = "#032146",
                       midpoint = .5,
                      limits = c(0, 1),
                      breaks = seq(0, 1, .2)) +
  coord_fixed(ratio = 1/2) +
  facet_wrap(~metric, nrow = 5,
             labeller = labeller(
               metric = c("binary" = "Any views of YouTube videos from channel",
                          "count" = "Total number of views of YouTube videos from channel",
                          "time" = "Seconds elapsed on YouTube videos from channel (capped)",
                          "sub count" = "Total number of views of subscribed channel",
                          "sub time" = "Seconds elapsed on videos from subscribed channel (capped)")
             )) +
  labs(x = "", y = "") +
  theme(axis.text.y = element_blank(),
        panel.grid = element_blank(),
        legend.position = 'bottom',
        strip.text.x = element_text(size = 12, hjust = 0))+
  guides(fill = guide_colourbar(barwidth = 20,
                                barheight = 1,
                                title = ""))
youtube_channel_type 
```

<img src="appendix-figures_files/figure-gfm/unnamed-chunk-13-1.png" style="display: block; margin: auto;" />

``` r
ggsave("appendix-figures_files/correlations_history_activity_youtube_channel_type.png",
      dpi = 600, height = 8, width = 9 )
```

## Table S9: Differential browsing behavior after install

(CODE ONLY)

``` r
### combine ALT/EXT/TOTAL browser history user-time data 
# must use browser HISTORY data
alt_ext_time <- bh_alt_time %>% 
  pivot_longer(-user_id, names_to = "date") %>% 
  mutate(source = "alternative") %>% 
  bind_rows(bh_ext_time %>% 
              pivot_longer(-user_id, names_to = "date") %>% 
              mutate(source = "extremist") ) %>% 
  right_join(bh_all_time %>%
              pivot_longer(-user_id, names_to = "date", values_to = "total"),
            by = c("user_id", "date")) %>%
  mutate(full_date = as.Date(date),
         year = 2020,
         month = month(full_date),
         week = week(full_date),
         week_date = as.Date(str_c(week, "-1"), format = "%U-%u"),
         fraction = value/total) %>% 
  group_by(week_date, source) %>% 
  mutate(time_per_week = sum(value)) %>% 
  group_by(month, source) %>% 
  mutate(time_per_month = sum(value)) %>% 
  ungroup()

# merge with yougov data
all_alt_ext_time <- alt_ext_time %>% 
  left_join(merged_data, by = "user_id") %>%
  mutate(install_date = ymd(install_date),
         install_diff = full_date - install_date,  # install date difference
         install_diff_biweek = cut(as.numeric(install_diff), 
                                   breaks = seq(-120, 150, by = 15))) 

# make panel data
its_panel_data <- all_alt_ext_time %>%
  mutate(value = if_else(!is.na(total), replace_na(value, 0), value)) %>%
  group_by(full_date, user_id, .drop = FALSE) %>%
  summarize(fraction_time_per_day = value/total,
            weight_cmd, value, total, install_diff) %>%
  ungroup() %>% 
  mutate(
    installed = if_else(install_diff < 0, 0, 1),
    days_after_install = ifelse(install_diff < 0, 0, install_diff)
  ) %>% 
  distinct(full_date, user_id, .keep_all = T) 

# estimate user/day fixed effects OLS
weighted_FE1 <-
  lm(
    fraction_time_per_day ~ installed + factor(user_id) + factor(full_date),
    data = its_panel_data,
    weights = weight_cmd
  )
# same model but with days after install
weighted_FE2 <-
  lm(
    fraction_time_per_day ~ installed + days_after_install + factor(user_id) + factor(full_date),
    data = its_panel_data,
    weights = weight_cmd
  )

# cluster by user
lmtest::coeftest(weighted_FE1, vcov = sandwich::vcovCL(weighted_FE1,
                                                cluster = ~ user_id,
                                                type = "HC1"))
lmtest::coeftest(weighted_FE2, vcov = sandwich::vcovCL(weighted_FE2,
                                                cluster = ~ user_id,
                                                type = "HC1"))
```

## Figure S13: Differences in perceptions of YouTube between full sample and extension sample

``` r
### ====== survey measures for those who did and did not install the extension
yg_youtube <-  merged_data %>% 
  select(user_id, weight_cmd, browser_sample,
         starts_with("youtube"),
         -youtube_vid_quality,
         -youtube_freq_use_3,
         -youtube_freq_use)

youtube_vars <- yg_youtube %>% 
  select(starts_with("youtube")) %>% 
  names()

yg_youtube_factor <- yg_youtube %>% 
  mutate(youtube_vid_trust = case_when(youtube_vid_trust == 1 ~ "A great deal",
                                       youtube_vid_trust == 2 ~ "A moderate amount",
                                       youtube_vid_trust == 3 ~ "Not much",
                                       youtube_vid_trust == 4 ~ "Not at all"),
         youtube_info_accurate = case_when(youtube_info_accurate == 1 ~ "All or almost all",
                                           youtube_info_accurate == 2 ~ "Most",
                                           youtube_info_accurate == 3 ~ "Some",
                                           youtube_info_accurate == 4 ~ "Very little",
                                           youtube_info_accurate == 5 ~ "None at all"),
         youtube_vid_bias = case_when(youtube_vid_bias == 1 ~ "Favor liberals",
                                      youtube_vid_bias == 2 ~ "Favor conservatives",
                                      youtube_vid_bias == 3 ~ "Neither",
                                      youtube_vid_bias == 4 ~ "Other"),
         youtube_personalize = case_when(youtube_personalize == 1 ~ "Very accurate",
                                         youtube_personalize == 2 ~ "Somewhat accurate",
                                         youtube_personalize == 3 ~ "Not very accurate",
                                         youtube_personalize == 4 ~ "Not at all accurate"),
         youtube_vid_trust = factor(youtube_vid_trust, levels = c("A great deal", "A moderate amount", "Not much", "Not at all")),
         youtube_info_accurate = factor(youtube_info_accurate, levels = c("All or almost all", "Most", "Some", "Very little", "None at all")),
         youtube_vid_bias = factor(youtube_vid_bias, levels = c("Favor liberals", "Favor conservatives", "Neither", "Other")),
         youtube_personalize = factor(youtube_personalize, levels = c("Very accurate", "Somewhat accurate", "Not very accurate", "Not at all accurate"))) %>% 
  dummy_cols(select_columns = youtube_vars)




svy_object <- svydesign(ids = ~ 1,
                        weights = ~ weight_cmd,
                        data = yg_youtube_factor)

estimates_df <- map_dfr(youtube_vars,
                        function (x) {
                          svyresults <- svyby(
                            ~ get(x),
                            by = ~ browser_sample,
                            design = svy_object,
                            svymean,
                            na.rm = TRUE
                          )
                          remove_str <- c(paste0("browser:", "get(x)"),
                                          paste0("full:", "get(x)"))
                          # technically, devrais utiliser svyciprop, mais la difference est négligeable ici
                          CIs <- confint(svyresults) 
                          ci_df <- CIs %>% 
                            as_tibble() %>% 
                            mutate(name = rownames(CIs)) %>% 
                            mutate(browser_sample = if_else(str_detect(name, "browser\\:"),
                                                            "browser", "full"),
                                   varlevel = stringr::str_replace_all(name,
                                                                   fixed(remove_str[1]),
                                                                   ""),
                                   varlevel = stringr::str_replace_all(varlevel,
                                                                   fixed(remove_str[2]),
                                                                   "")) %>% 
                            mutate(join_name = paste0(browser_sample,varlevel))%>% 
                            rename(ci_lwr = `2.5 %`,
                                   ci_upr = `97.5 %`) %>% 
                            select(join_name, ci_lwr, ci_upr) 
                          
                          df <- as_tibble(svyresults) %>% 
                            pivot_longer(-browser_sample) %>%
                            mutate(stat = if_else(str_detect(name, "se\\."), "std_err", "prop"),
                                   name = stringr::str_replace_all(name, 
                                                                   fixed("get(x)"), 
                                                      ""),
                                   name = stringr::str_replace_all(name, 
                                                                   fixed("se."), 
                                                                   "")) %>%  
                            pivot_wider(names_from = stat) %>%
                            rename(varlevel = name) %>% 
                            mutate(join_name = paste0(browser_sample,varlevel)) %>% 
                            left_join(ci_df, 
                                      by = "join_name")  %>% 
                            mutate(variable = x)
                          return(df)
                        }) %>% 
  #rename(var_level = name) %>%
  mutate(
    percentage = 100 * prop,
    ci_lwr = 100 * ci_lwr,
    ci_upr = 100 * ci_upr
  )


youtube_attitudes_plot <- function(youtube_var) {
  facet_labs <- c(
    "youtube_info_accurate" = "Accuracy of information on YouTube",
    "youtube_personalize" = "Belief YouTube personalizes",
    "youtube_vid_trust" = "Trust in YouTube videos",
    "youtube_vid_bias" = "Perception of YouTube bias"
  )
  
  filtered_estimates_df <- estimates_df %>% 
    filter(variable == youtube_var) %>% 
    mutate(varlevel = str_wrap_factor(factor(varlevel,
                            levels = levels(yg_youtube_factor[[youtube_var]])),
           width = 6),
           browser_sample = factor(browser_sample, levels = c("full", "browser")))
  
  filtered_estimates_df %>% 
    ggplot(aes(x = varlevel, y = percentage,
               ymin = ci_lwr, ymax = ci_upr,
               color = browser_sample)) +
    geom_point(size = 2,
               position = position_dodge(width = .3)) +
    geom_errorbar(position = position_dodge(width = .3),
                  width = 0) + 
    labs(x = "", y = "") +
    scale_y_continuous(limits = c(0, 80), 
                       labels = paste0(seq(0, 80, 20), "%"),
                       breaks = seq(0, 80, 20)) +
    facet_wrap( ~ variable,
                scales = "free_x",
                labeller = labeller(
                  variable = facet_labs
                )) +
    scale_color_manual(labels = c("browser" = "Extension sample",
                                  "full" = "Full sample"),
                       values = c("#F74846",
                                  "#28B0C4"),
                       name = "") +
    ggthemes::theme_gdocs() +
    theme(legend.position = 'bottom',
          legend.direction = 'horizontal',
          strip.text = element_text(size = 14, hjust = 1),
          strip.background =element_rect(fill="grey", color = "grey"),
          panel.grid.major.y = element_line(color = "grey95"),
          panel.grid.major.x = element_blank(),
          axis.text.x = element_text(size = 12),
          plot.background = element_rect(color = "white"))
}

youtube_attitudes_plots <- map(
  1:4, ~youtube_attitudes_plot(youtube_vars[[.x]])  +
    theme(legend.position = 'none')
)
common_legend <- get_legend(youtube_attitudes_plot(youtube_vars[[1]]) )

plot_grid(
  plot_grid(
    youtube_attitudes_plots[[1]],
    youtube_attitudes_plots[[2]],
    youtube_attitudes_plots[[3]],
    youtube_attitudes_plots[[4]],
    nrow = 2
  ),
  common_legend,
  ncol = 1,
  rel_heights = c(1, .1)
)
```

<img src="appendix-figures_files/figure-gfm/unnamed-chunk-14-1.png" style="display: block; margin: auto;" />

``` r
ggsave("appendix-figures_files/youtube_attitudes.png",
      dpi = 600, height = 6, width = 12 )
```

## Figure S14: Percentage of views to each channel type by video number within session

This repository does not contain session-level data in `merged.tsv`, but
code that reproduces session level figures are presented.

``` r
# need data where each observation is a youtube action
activity_youtube = read_delim(paste0(data_dir, "youtube/merged.tsv"),
                              delim = "\t")
# only sessions with videos in them
activity_youtube_video <- activity_youtube %>% 
  filter(!is.na(video_id))

# remove users not matched to surveys
restricted_sample <- activity_youtube_video %>% 
  filter(user_id %in% activity_data$user_id) %>% 
  left_join(activity_data %>%
              select(user_id, browser_sample, samplegroup, weight_cmd))

within_session_wtd <- restricted_sample %>% 
  group_by(user_id, yt_session) %>% 
  mutate(yt_n_video_in_session = n(),
         video_index = row_number(yt_session)) %>% 
  ungroup() %>% 
  group_by(video_index) %>%
  mutate(across(c("alternative_match", 
                  "extremist_match", 
                  "mainstream_match", 
                  "other_match"),
                ~.x * weight_cmd)) %>%
  mutate(
    total_video_index = n(),
    fraction_alternative = sum(alternative_match) / total_video_index,
    fraction_extremist = sum(extremist_match) / total_video_index,
    fraction_mainstream = sum(mainstream_match) / total_video_index,
    fraction_other = sum(other_match) / total_video_index
  ) %>% 
  select(video_index, total_video_index, starts_with("fraction_")) %>% 
  pivot_longer(-c("video_index", "total_video_index"),
               names_to = "channel_type", 
               values_to = "proportion") %>% 
  distinct()

video_index_rug <- restricted_sample %>% 
  group_by(user_id, yt_session) %>% 
  mutate(yt_n_video_in_session = n(),
         video_index = row_number(yt_session)) %>% 
  select(video_index)


within_session_wtd %>%
  filter(channel_type != "fraction_other") %>% 
  filter(video_index %in% 1:319) %>% # 99%
  ggplot(aes(x = video_index, 
             y = proportion,
             weight = total_video_index,
             #size = weight_video_index,
             color = channel_type) ) +
  geom_point(alpha = .2) +
  geom_rug(data = video_index_rug %>% filter(video_index %in% 1:319),
           aes(x = video_index, y = 0),
           alpha = .005,
           sides = "b",
           position = position_jitter(width = .15),
           inherit.aes = F) +
  geom_smooth(method = 'loess',
              formula = 'y~x',
              span = .5, # using about a third of data points as window
              # i.e. uses 30% closest points to a given observation for the fit
              aes(fill = channel_type),
              method.args = list(degree = 1),
              alpha = .3) +
  scale_y_continuous(limits = c(0, .1),
                     labels = paste0(seq(0, 10, by = 2), "%"),
                     breaks = seq(0, .1, by = .02)) +
  scale_color_manual(values = c("#FFA500", "#CD5C5C", "#015CB9"),
                     labels = c("Alternative\nchannels",
                                "Extremist\nchannels",
                                "Mainstream\nmedia"),
                     name = "") +
  scale_fill_manual(values = c("#FFA500", "#CD5C5C", "#015CB9"),
                    labels = c("Alternative\nchannels",
                               "Extremist\nchannels",
                               "Mainstream\nmedia"),
                    name = "") +
  labs(x = "Video index within session", 
       y = paste0("Estimated percentage of channel type") ) +
  theme_minimal() +
  theme(legend.position = 'bottom',
        axis.title.x = element_text(hjust = .95))

ggsave("./data/extension_sessions_proportions_channel_type.png",
      dpi = 600, height = 6, width = 12 )
ggsave("./data/extension_sessions_proportions_channel_type.pdf",
      dpi = 600, height = 6, width = 12 )
```

## Figure S15: Percentage of views to each channel type by total session length

``` r
user_sessions_at <- read_delim("./Data/extremism/user_summary_sessions_activity.tsv",
                            delim = '\t') 
`# merge sessions data with survey data
user_sessions_at_yg <- user_sessions_at %>% 
  left_join(activity_data %>% select(user_id, caseid_d34, weight_cmd)) %>% 
  filter(!is.na(weight_cmd))

user_sessions_at_yg %>%
  select(all_of(count_vars), yt_n_video, yt_window, weight_cmd) %>%
  mutate(across(c(all_of(count_vars)),
                ~.x*weight_cmd)) %>%
  group_by(yt_n_video) %>%
  summarize_at(.vars = all_of(count_vars),
               .funs = ~ (sum(.) / sum(yt_n_video*weight_cmd)) * 100) %>%
  pivot_longer(-yt_n_video) %>%
  filter(name != 'yt_n_video_other_all') %>%
  ggplot(aes(x = yt_n_video, y = value,
             color = name)) +
  geom_point(alpha = .5) +
  geom_smooth(method = 'loess',
              aes(fill = name),
              method.args = list(degree = 1),
              span = .5)+
  geom_rug(aes(x = yt_n_video,
               y = 0),
           alpha = .3,
           sides = "b",
           #position = position_jitter(width = .15),
           inherit.aes = F) +
  scale_color_manual(values = c("#FFA500", "#CD5C5C", "#015CB9"),
                     labels = c("Alternative\nchannels",
                                "Extremist\nchannels",
                                "Mainstream\nmedia"),
                     name = "") +
  scale_fill_manual(values = c("#FFA500", "#CD5C5C", "#015CB9"),
                     labels = c("Alternative\nchannels",
                                "Extremist\nchannels",
                                "Mainstream\nmedia"),
                     name = "") +
  labs(x = "Number of videos in session (session length)",
       y = 'Average percentage of channel videos viewed',
       caption = "") +
  coord_cartesian(xlim = c(0, session_length_threshold99)) +
  scale_y_continuous(limits = c(-1, 60),
                     breaks = seq(0, 60, 10),
                     labels = paste0(seq(0, 60, 10), "%") )

ggsave("appendix-figures_files/percent_visits_sessions_length99.png",
      dpi = 600, height = 6, width = 12 )
```

## Table S10: External referrers to alternative and extremist channel videos

(`referrers_data` is on a secure server and cannot be made public. See
`build.R` for how the dataframe was constructed and for a list of the
referrers.)

``` r
# external referrers table
total_external_referrers_by_channel_type_wtd <- referrers_data %>% 
  left_join(activity_data %>%
              select(user_id, weight_cmd),
            by = c('user_id_all'= 'user_id')) %>% 
  mutate(is_external_referrer = if_else(!str_detect(predecessor_url, "youtube.com"), 1,0 )) %>% 
  group_by(channel_type, is_external_referrer) %>% 
  summarise(n = sum(weight_cmd, na.rm = T)) %>% 
  filter(is_external_referrer == 1 & channel_type %in% c("alternative", "extremist"))

# calculate proportions
external_referrers_by_domain <- referrers_data %>% 
  left_join(activity_data %>%
              select(user_id, weight_cmd),
            by = c('user_id_all'= 'user_id')) %>% 
  mutate(external_referrer_group = case_when(predecessor_domain %in% external_referrers$main_social ~ "Mainstream social",
                                             predecessor_domain %in% external_referrers$alternative_social ~ "Alternative social",
                                             predecessor_domain %in% external_referrers$search_engine ~ "Search engine social",
                                             predecessor_domain %in% external_referrers$webmail ~ "Webmail",
                                             TRUE ~ NA_character_)) %>%
  filter(!is.na(external_referrer_group) ) %>% 
  group_by(predecessor_domain, channel_type) %>% 
  summarise(n = sum(weight_cmd, na.rm = T)) %>% 
  filter(channel_type %in% c("alternative", "extremist")) %>% 
  mutate(n_external_alternative = total_external_referrers_by_channel_type_wtd[total_external_referrers_by_channel_type_wtd$channel_type == "alternative", ]$n,
         n_external_extremist = total_external_referrers_by_channel_type_wtd[total_external_referrers_by_channel_type_wtd$channel_type == "extremist", ]$n,
         percent = if_else(channel_type == "alternative" ,
                        100*(n/n_external_alternative),
                        100*(n/n_external_extremist))) %>% 
  pivot_wider(names_from = "channel_type",
              values_from = c("percent", "n") ) %>% 
  select(-starts_with("n_")) %>% 
  mutate(across(c("percent_extremist", "percent_alternative"), ~ replace_na(., 0)))

#clean up/format table
external_referrers_by_domain_table <- external_referrers_by_domain %>% 
  mutate(external_referrer_group = case_when(predecessor_domain %in% external_referrers$main_social ~ "Mainstream social",
                                             predecessor_domain %in% external_referrers$alternative_social ~ "Alternative social",
                                             predecessor_domain %in% external_referrers$search_engine ~ "Search engine social",
                                             predecessor_domain %in% external_referrers$webmail ~ "Webmail",
                                             TRUE ~ NA_character_)) %>% 
  mutate(percent_extremist = round(percent_extremist, 3),
         percent_alternative = round(percent_alternative, 3))%>% 
  select(`Referrer type` = external_referrer_group,
         `Preceding domain` = predecessor_domain,
         `Percent to extremist channel` = percent_extremist,
         `Percent to alternative channel` = percent_alternative) %>% 
  arrange(`Referrer type`, `Percent to extremist channel`)

# print out
external_referrers_by_domain_table %>%
  kbl(format = "html",
      digits = 5,
      booktabs = TRUE) 

# save as TEX file
external_referrers_by_domain_table %>%
  kbl(format = "latex",
      digits = 3,
      booktabs = TRUE) %>% 
  readr::write_lines(file = "./appendix-figures_files/external_referrers_by_channel_type_wtd.tex")
```

## Figure S16: Concentration of exposure to alternative and extremist channels (view counts)

``` r
cumsum_fxn <- function (var, data) {
  channel_type <-
      str_replace(str_extract(var, "[a-z]+_all$"), "_all", "")
  data %>%
    arrange(desc(.data[[var]])) %>%
    mutate(
      users = weight_cmd / sum(.$weight_cmd, na.rm = T),
      views = (.data[[var]] / sum(.data[[var]], na.rm = T)),
      cum_user = cumsum(users),
      cum_views = cumsum(views),
      ln_cum_user = log10(cumsum(users)),
      ln_cum_views = log10(cumsum(views)),
      source = channel_type
    ) %>%
    select(cum_user, cum_views, ln_cum_user, source, caseid, weight_cmd)
}

lab_stat <-
  cumsum_fxn('activity_yt_n_video_alternative_all', activity_data) %>%
  filter(round(cum_views, 2) == .80) %>%
  pull(ln_cum_user)
at_counts <- paste0("activity_yt_n_video_",
                    channel_types,
                    "_all")
at_cum_count_user <- bind_rows(map(at_counts, 
                                   ~ cumsum_fxn(.x, activity_data)))

count_cumsum_plot_inset <- at_cum_count_user %>%
  ggplot(aes(x = ln_cum_user, y = cum_views, color = source)) +
  geom_line(size = 2) +
  geom_point(
    aes(x = lab_stat, y = .8),
    color = 'black',
    shape = 4,
    stroke = 1,
    size = 1.5
  ) +
  scale_y_continuous(labels = scales::percent) +
  #scale_x_continuous(labels = scales::percent) +
  scale_color_manual(
    name = "",
    labels = c(
      "Alternative\nchannels",
      "Extremist\nchannels",
      "Mainstream media\nchannels",
      "Other\nchannels"
    ),
    values = c("#FFA500", "#CD5C5C", "#015CB9", "#E3E6E6")
  ) +
  geom_segment(aes(
    x = -4,
    xend = lab_stat,
    y = .8,
    yend = .8
  ),
  lty = 2,
  color = 'black') +
  geom_segment(aes(
    x = lab_stat,
    xend = lab_stat,
    y = 0,
    yend = .8
  ),
  lty = 2,
  color = 'black') +
  geom_curve(
    aes(
      x = -2.5,
      xend = lab_stat,
      y = .9,
      yend = .8
    ),
    #arrow = arrow(type = "closed", length = unit(0.03, "npc")),
    curvature = 0.25,
    angle = 35,
    color = 'black'
  ) +
  scale_x_continuous(
    limits = c(-4, 0),
    breaks = seq(-4, 0,
                 by = 1),
    labels = paste0((round(10 ^ (
      seq(-4, 0,
          by = 1)
    ), 3)) * 100, "%")
  ) +
  annotate(
    "text",
    x = -3,
    y = .95,
    label = str_wrap(paste0(
      format(round(10^(lab_stat), 3) * 100, nsmall = 1),
      "% of users account for 80% of visits to alternative channel videos."
    ), width = 45),
    size = 2.5
  ) +
  labs(x = expression(log[10]~ "(Percentage of users)"), 
       y = "Percentage of total exposure (views)") +
  theme_minimal() +
  #title = "Total percent alternative/extremist videos watched by user percentile"
  theme(
    plot.background = element_blank(),
    legend.position = 'bottom',
    axis.title.x = element_text(size = 10),
    axis.title.y = element_text(size = 10)
  );count_cumsum_plot_inset
```

<img src="appendix-figures_files/figure-gfm/ecdf visits-1.png" style="display: block; margin: auto;" />

``` r
count_cumsum_plot_zoomout <- at_cum_count_user %>%
  ggplot(aes(x = cum_user, y = cum_views, color = source)) +
  geom_line(size = 2) +
  scale_y_continuous(labels = scales::percent) +
  scale_x_continuous(labels = scales::percent) +
  scale_color_manual(
    name = "",
    labels = c(
      "Alternative\nchannels",
      "Extremist\nchannels",
      "Mainstream media\nchannels",
      "Other\nchannels"
    ),
    values = c("#FFA500", "#CD5C5C", "#015CB9", "#E3E6E6")
  ) +
  annotate(geom = "rect", col = "black", fill = "white",
           lwd = 2,
           ymin = 0, ymax = .87, xmin = .28, xmax = 1) +
  labs(x = "Percentage of users", y = "Percentage of total exposure (views)") +
  theme_minimal() +
  #title = "Total percent alternative/extremist videos watched by user percentile"
  theme(
    plot.background = element_blank(),
    legend.position = 'bottom',
    axis.title.x = element_text(size = 12),
    axis.title.y = element_text(size = 12)
  )


count_cumsum_plot_zoomout +
  patchwork::inset_element(count_cumsum_plot_inset +
                             guides(color = "none"), 
                           left = 0.3, bottom = 0.05, right = .95, top = .85,
                           on_top = TRUE)
```

<img src="appendix-figures_files/figure-gfm/ecdf visits-2.png" style="display: block; margin: auto;" />

``` r
ggsave(
  "appendix-figures_files/cdf_users_visit_exposure.pdf",
  width = 8.5,
  height = 5.5
)
```
